Ejemplo n.º 1
0
        public string RunNMFbasedOMF(int maxEpoch, double learnRate, double regularization, int factorCount,
                                     List <double> quantizer, int topN = 0)
        {
            if (!ReadyForNumerical)
            {
                GetReadyForNumerical();
            }
            StringBuilder log = new StringBuilder();

            log.AppendLine(Utils.PrintHeading("NMF based OMF"));

            // NMF Prediction
            // Get ratings from scorer, for both train and test
            // R_all contains indexes of all ratings both train and test
            DataMatrix R_all = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);

            R_all.MergeNonOverlap(R_unknown);
            R_all.MergeNonOverlap(R_train.IndexesOfNonZeroElements());
            Utils.StartTimer();
            DataMatrix R_predictedByNMF = NMF.PredictRatings(R_train, R_all, maxEpoch,
                                                             learnRate, regularization, factorCount);

            log.AppendLine(Utils.StopTimer());

            // OMF Prediction
            log.AppendLine(Utils.PrintHeading("Ordinal Matrix Factorization with NMF as scorer"));
            Utils.StartTimer();
            Dictionary <Tuple <int, int>, List <double> > OMFDistributionByUserItem;
            DataMatrix R_predicted;

            log.AppendLine(OMF.PredictRatings(R_train.Matrix, R_unknown.Matrix, R_predictedByNMF.Matrix,
                                              quantizer, out R_predicted, out OMFDistributionByUserItem));
            log.AppendLine(Utils.StopTimer());

            // Numerical Evaluation
            log.AppendLine(Utils.PrintValue("RMSE", RMSE.Evaluate(R_test, R_predicted).ToString("0.0000")));
            log.AppendLine(Utils.PrintValue("MAE", MAE.Evaluate(R_test, R_predicted).ToString("0.0000")));

            // TopN Evaluation
            if (topN != 0)
            {
                var topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R_predicted, topN);
                for (int n = 1; n <= topN; n++)
                {
                    log.AppendLine(Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")));
                }
                for (int n = 1; n <= topN; n++)
                {
                    log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")));
                }
            }

            // Save OMFDistribution to file
            if (!File.Exists(GetDataFileName("RatingOMF_")))
            {
                Utils.IO <Dictionary <Tuple <int, int>, List <double> > > .SaveObject(OMFDistributionByUserItem, GetDataFileName("RatingOMF_"));
            }

            return(log.ToString());
        }
Ejemplo n.º 2
0
        public string RunPrefKNN(int neighborCount, int topN = 10)
        {
            if (!ReadyForOrdinal)
            {
                GetReadyForOrdinal();
            }
            StringBuilder log = new StringBuilder();

            log.AppendLine(Utils.PrintHeading("PrefKNN"));

            // Prediction
            Utils.StartTimer();
            DataMatrix R_predicted = PrefUserKNN.PredictRatings(PR_train, R_unknown, neighborCount, UserSimilaritiesOfPref);

            log.AppendLine(Utils.StopTimer());

            // TopN Evaluation
            var topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R_predicted, topN);

            for (int n = 1; n <= topN; n++)
            {
                Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000"));
            }
            for (int n = 1; n <= topN; n++)
            {
                log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")));
            }

            return(log.ToString());
        }
Ejemplo n.º 3
0
        public string RunPrefNMF(int maxEpoch, double learnRate, double regularizationOfUser,
                                 double regularizationOfItem, int factorCount, int topN = 10)
        {
            if (!ReadyForOrdinal)
            {
                GetReadyForOrdinal();
            }
            StringBuilder log = new StringBuilder();

            log.AppendLine(Utils.PrintHeading("PrefNMF"));

            // Prediction
            Utils.StartTimer();
            DataMatrix R_predicted = PrefNMF.PredictRatings(PR_train, R_unknown,
                                                            maxEpoch, learnRate, regularizationOfUser, regularizationOfItem, factorCount);

            log.AppendLine(Utils.StopTimer());

            // Evaluation
            var topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R_predicted, topN);

            for (int n = 1; n <= topN; n++)
            {
                Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000"));
            }
            for (int n = 1; n <= topN; n++)
            {
                log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")));
            }

            return(log.ToString());
        }
Ejemplo n.º 4
0
        public string RunPrefMRF(double regularization, double learnRate, int maxEpoch, List <double> quantizer,
                                 int topN = 10)
        {
            // Load OMFDistribution from file
            Dictionary <Tuple <int, int>, List <double> > OMFDistributionByUserItem;

            if (File.Exists(GetDataFileName("PrefOMF_")))
            {
                OMFDistributionByUserItem = Utils.IO <Dictionary <Tuple <int, int>, List <double> > > .LoadObject(GetDataFileName("PrefOMF_"));
            }
            else
            {
                return("Abort, Run OMF first.");
            }

            if (!ReadyForOrdinal)
            {
                GetReadyForOrdinal();
            }
            StringBuilder log = new StringBuilder();

            log.AppendLine(Utils.PrintHeading("PrefMRF: PrefNMF based ORF"));

            // Prediction
            Utils.StartTimer();
            DataMatrix R_predicted_expectations;
            DataMatrix R_predicted_mostlikely;

            // Convert PR_train into user-wise preferences
            DataMatrix R_train_positions = new DataMatrix(PR_train.GetPositionMatrix());

            R_train_positions.Quantization(quantizer[0], quantizer[quantizer.Count - 1] - quantizer[0], quantizer);

            ORF orf = new ORF();

            orf.PredictRatings(R_train_positions, R_unknown, StrongSimilarityIndicatorsByItemPref,
                               OMFDistributionByUserItem, regularization, learnRate, maxEpoch,
                               quantizer.Count, out R_predicted_expectations, out R_predicted_mostlikely);

            log.AppendLine(Utils.StopTimer());

            // Evaluation
            var topNItemsByUser_expectations = ItemRecommendationCore.GetTopNItemsByUser(R_predicted_expectations, topN);

            for (int n = 1; n <= topN; n++)
            {
                log.AppendLine(Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser,
                                                                           topNItemsByUser_expectations, n).ToString("0.0000")));
            }
            for (int n = 1; n <= topN; n++)
            {
                log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser_expectations, n).ToString("0.0000")));
            }

            return(log.ToString());
        }
Ejemplo n.º 5
0
        public string RunNMFbasedORF(double regularization, double learnRate,
                                     int maxEpoch, List <double> quantizer, int topN = 0)
        {
            // Load OMFDistribution from file
            Dictionary <Tuple <int, int>, List <double> > OMFDistributionByUserItem;

            if (File.Exists(GetDataFileName("RatingOMF_")))
            {
                OMFDistributionByUserItem = Utils.IO <Dictionary <Tuple <int, int>, List <double> > > .LoadObject(GetDataFileName("RatingOMF_"));
            }
            else
            {
                return("Abort, Run OMF first.");
            }

            if (!ReadyForNumerical)
            {
                GetReadyForNumerical();
            }
            StringBuilder log = new StringBuilder();

            log.AppendLine(Utils.PrintHeading("NMF based ORF"));

            // Prediction
            Utils.StartTimer();
            DataMatrix R_predicted_expectations;
            DataMatrix R_predicted_mostlikely;
            ORF        orf = new ORF();

            orf.PredictRatings(R_train, R_unknown, StrongSimilarityIndicatorsByItemRating,
                               OMFDistributionByUserItem, regularization, learnRate, maxEpoch,
                               quantizer.Count, out R_predicted_expectations, out R_predicted_mostlikely);
            log.AppendLine(Utils.StopTimer());

            // Numerical Evaluation
            log.AppendLine(Utils.PrintValue("RMSE", RMSE.Evaluate(R_test, R_predicted_expectations).ToString("0.0000")));
            log.AppendLine(Utils.PrintValue("MAE", RMSE.Evaluate(R_test, R_predicted_mostlikely).ToString("0.0000")));

            // Top-N Evaluation
            if (topN != 0)
            {
                var topNItemsByUser_expectations = ItemRecommendationCore.GetTopNItemsByUser(R_predicted_expectations, topN);
                for (int n = 1; n <= topN; n++)
                {
                    log.AppendLine(Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser_expectations, n).ToString("0.0000")));
                }
                for (int n = 1; n <= topN; n++)
                {
                    log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser_expectations, n).ToString("0.0000")));
                }
            }

            return(log.ToString());
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Rating based Non-negative Matrix Factorization
        /// </summary>
        public string RunNMF(int maxEpoch, double learnRate, double regularization,
                             int factorCount, int topN = 0)
        {
            if (!ReadyForNumerical)
            {
                GetReadyForNumerical();
            }
            StringBuilder log = new StringBuilder();

            log.AppendLine(Utils.PrintHeading("NMF"));

            // Prediction
            Utils.StartTimer();
            DataMatrix R_predicted = NMF.PredictRatings(R_train, R_unknown, maxEpoch,
                                                        learnRate, regularization, factorCount);

            log.AppendLine(Utils.StopTimer());

            // Numerical Evaluation
            log.AppendLine(Utils.PrintValue("RMSE", RMSE.Evaluate(R_test, R_predicted).ToString("0.0000")));
            log.AppendLine(Utils.PrintValue("MAE", MAE.Evaluate(R_test, R_predicted).ToString("0.0000")));

            // TopN Evaluation
            if (topN != 0)
            {
                var topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R_predicted, topN);
                for (int n = 1; n <= topN; n++)
                {
                    log.AppendLine(Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")));
                }
                for (int n = 1; n <= topN; n++)
                {
                    log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")));
                }
            }

            return(log.ToString());
        }
Ejemplo n.º 7
0
            public void GetTopNItemsByUser()
            {
                /*
                 * 5  3  0  1
                 * 4  0  0  1
                 * 1  1  0  5
                 * 1  0  0  4
                 * 0  1  5  4
                 */
                DataMatrix R = GetSampleRatingMatrix();

                // act
                Dictionary <int, List <int> > topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R, 2);

                // assert
                Debug.Assert(topNItemsByUser[0].Count == 2);
                Debug.Assert(topNItemsByUser[0][0] == 0);
                Debug.Assert(topNItemsByUser[0][1] == 1);
                Debug.Assert(topNItemsByUser[1][0] == 0);
                Debug.Assert(topNItemsByUser[1][1] == 3);
                Debug.Assert(topNItemsByUser[4][0] == 2);
                Debug.Assert(topNItemsByUser[4][1] == 3);
            }
Ejemplo n.º 8
0
        /// <summary>
        /// Recommend the most popular (measured by mean rating) items to all users.
        /// </summary>
        public string RunMostPopular(int topN)
        {
            if (!ReadyForNumerical)
            {
                GetReadyForNumerical();
            }
            StringBuilder log = new StringBuilder();

            log.AppendLine(Utils.PrintHeading("Most popular"));

            // Prediction
            Utils.StartTimer();
            var        meanByItem  = R_train.GetItemMeans();
            DataMatrix R_predicted = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);

            foreach (var element in R_unknown.Matrix.EnumerateIndexed(Zeros.AllowSkip))
            {
                int indexOfUser = element.Item1;
                int indexOfItem = element.Item2;
                R_predicted[indexOfUser, indexOfItem] = meanByItem[indexOfItem];
            }
            var topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R_predicted, topN);

            log.AppendLine(Utils.StopTimer());

            // TopN Evaluation
            for (int n = 1; n <= topN; n++)
            {
                log.AppendLine(Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")));
            }
            for (int n = 1; n <= topN; n++)
            {
                log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")));
            }

            return(log.ToString());
        }
Ejemplo n.º 9
0
        public string RunUserKNN(int topN = 0)
        {
            if (!ReadyForNumerical)
            {
                GetReadyForNumerical();
            }
            StringBuilder log = new StringBuilder();

            log.AppendLine(Utils.PrintHeading("UserKNN"));

            // Prediction
            Utils.StartTimer();
            DataMatrix R_predicted = Numerical.UserKNN.PredictRatings(R_train, R_unknown, UserSimilaritiesOfRating, MaxCountOfNeighbors);

            log.AppendLine(Utils.StopTimer());

            // Numerical Evaluation
            log.AppendLine(Utils.PrintValue("RMSE", RMSE.Evaluate(R_test, R_predicted).ToString("0.0000")));
            log.AppendLine(Utils.PrintValue("MAE", MAE.Evaluate(R_test, R_predicted).ToString("0.0000")));

            // TopN Evaluation
            if (topN != 0)
            {
                var topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R_predicted, topN);
                for (int n = 1; n <= topN; n++)
                {
                    Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000"));
                }
                for (int n = 1; n <= topN; n++)
                {
                    log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")));
                }
            }

            return(log.ToString());
        }
Ejemplo n.º 10
0
        public string RunPrefNMFbasedOMF(int maxEpoch, double learnRate, double regularizationOfUser,
                                         double regularizationOfItem, int factorCount, List <double> quantizer, int topN)
        {
            if (!ReadyForOrdinal)
            {
                GetReadyForOrdinal();
            }
            StringBuilder log = new StringBuilder();

            log.AppendLine(Utils.PrintHeading("PrefNMF based OMF"));

            // =============PrefNMF prediction on Train+Unknown============
            // Get ratings from scorer, for both train and test
            // R_all contains indexes of all ratings both train and test
            // DataMatrix R_all = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);
            // R_all.MergeNonOverlap(R_unknown);
            //R_all.MergeNonOverlap(R_train.IndexesOfNonZeroElements());
            //PrefRelations PR_unknown = PrefRelations.CreateDiscrete(R_all);

            // R_all is far too slow, change the data structure
            //Dictionary<int, List<Tuple<int, int>>> PR_unknown = new Dictionary<int, List<Tuple<int, int>>>();
            //Dictionary<int, List<int>> PR_unknown_cache = new Dictionary<int, List<int>>();
            Dictionary <int, List <int> > ItemsByUser_train   = R_train.GetItemsByUser();
            Dictionary <int, List <int> > ItemsByUser_unknown = R_unknown.GetItemsByUser();
            Dictionary <int, List <int> > PR_unknown          = new Dictionary <int, List <int> >(ItemsByUser_train);
            List <int> keys = new List <int>(ItemsByUser_train.Keys);

            foreach (var key in keys)
            {
                PR_unknown[key].AddRange(ItemsByUser_unknown[key]);
            }

            /*
             * foreach (var row in R_unknown.Matrix.EnumerateRowsIndexed())
             * {
             *  int indexOfUser = row.Item1;
             *  PR_unknown_cache[indexOfUser] = new List<int>();
             *  Vector<double> itemsOfUser = row.Item2;
             *  foreach (var item in itemsOfUser.EnumerateIndexed(Zeros.AllowSkip))
             *  {
             *      PR_unknown_cache[indexOfUser].Add(item.Item1);
             *  }
             * }
             * foreach (var row in R_train.Matrix.EnumerateRowsIndexed())
             * {
             *  int indexOfUser = row.Item1;
             *  Vector<double> itemsOfUser = row.Item2;
             *  foreach (var item in itemsOfUser.EnumerateIndexed(Zeros.AllowSkip))
             *  {
             *      PR_unknown_cache[indexOfUser].Add(item.Item1);
             *  }
             * }
             */


            Utils.StartTimer();
            SparseMatrix PR_predicted = PrefNMF.PredictPrefRelations(PR_train, PR_unknown,
                                                                     maxEpoch, learnRate, regularizationOfUser, regularizationOfItem, factorCount, quantizer);

            // Both predicted and train need to be quantized
            // otherwise OMF won't accept
            //PR_predicted.quantization(0, 1.0,
            //   new List<double> { Config.Preferences.LessPreferred,
            //            Config.Preferences.EquallyPreferred, Config.Preferences.Preferred });
            DataMatrix R_predictedByPrefNMF = new DataMatrix(PR_predicted);// new DataMatrix(PR_predicted.GetPositionMatrix());

            // PR_train itself is already in quantized form!
            //PR_train.quantization(0, 1.0, new List<double> { Config.Preferences.LessPreferred, Config.Preferences.EquallyPreferred, Config.Preferences.Preferred });
            DataMatrix R_train_positions = new DataMatrix(PR_train.GetPositionMatrix());

            R_train_positions.Quantization(quantizer[0], quantizer[quantizer.Count - 1] - quantizer[0], quantizer);
            log.AppendLine(Utils.StopTimer());

            // =============OMF prediction on Train+Unknown============
            log.AppendLine(Utils.PrintHeading("Ordinal Matrix Factorization with PrefNMF as scorer"));
            Utils.StartTimer();
            Dictionary <Tuple <int, int>, List <double> > OMFDistributionByUserItem;
            DataMatrix R_predicted;

            log.AppendLine(OMF.PredictRatings(R_train_positions.Matrix, R_unknown.Matrix, R_predictedByPrefNMF.Matrix,
                                              quantizer, out R_predicted, out OMFDistributionByUserItem));
            log.AppendLine(Utils.StopTimer());

            // TopN Evaluation
            var topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R_predicted, topN);

            for (int n = 1; n <= topN; n++)
            {
                log.AppendLine(Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")));
            }
            for (int n = 1; n <= topN; n++)
            {
                log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")));
            }

            // Save OMFDistribution to file
            if (!File.Exists(GetDataFileName("PrefOMF_")))
            {
                Utils.IO <Dictionary <Tuple <int, int>, List <double> > > .SaveObject(OMFDistributionByUserItem, GetDataFileName("PrefOMF_"));
            }

            return(log.ToString());
        }
Ejemplo n.º 11
0
        public string GetReadyForNumerical(bool saveLoadedData = true)
        {
            if (ReadyForNumerical)
            {
                return("Is ready.");
            }

            StringBuilder log = new StringBuilder();

            Utils.StartTimer();

            log.AppendLine(Utils.PrintHeading("Create R_train/R_test sets from " + DataSetFile));
            Utils.LoadMovieLensSplitByCount(DataSetFile, out R_train,
                                            out R_test, MinCountOfRatings, MaxCountOfRatings, CountOfRatingsForTrain, ShuffleData, Seed);

            Console.WriteLine(R_train.DatasetBrief("Train set"));
            Console.WriteLine(R_test.DatasetBrief("Test set"));
            log.AppendLine(R_train.DatasetBrief("Train set"));
            log.AppendLine(R_test.DatasetBrief("Test set"));

            R_unknown = R_test.IndexesOfNonZeroElements();

            log.AppendLine(Utils.PrintValue("Relevant item criteria", RelevantItemCriteria.ToString("0.0")));
            RelevantItemsByUser = ItemRecommendationCore.GetRelevantItemsByUser(R_test, RelevantItemCriteria);
            log.AppendLine(Utils.PrintValue("Mean # of relevant items per user",
                                            RelevantItemsByUser.Average(k => k.Value.Count).ToString("0")));
            log.AppendLine(Utils.StopTimer());

            #region Prepare similarity data
            if (File.Exists(GetDataFileName("USR")) &&
                File.Exists(GetDataFileName("ISR")) &&
                File.Exists(GetDataFileName("SSIIR")))
            {
                Utils.StartTimer();
                Utils.PrintHeading("Load user-user similarities (rating based)");
                UserSimilaritiesOfRating = Utils.IO <SimilarityData> .LoadObject(GetDataFileName("USR"));

                Utils.StopTimer();

                Utils.StartTimer();
                Utils.PrintHeading("Load item-item similarities (rating based)");
                ItemSimilaritiesOfRating = Utils.IO <SimilarityData> .LoadObject(GetDataFileName("ISR"));

                Utils.StopTimer();

                Utils.StartTimer();
                Utils.PrintHeading("Load item-item strong similarity indicators (rating based)");
                StrongSimilarityIndicatorsByItemRating = Utils.IO <HashSet <Tuple <int, int> > > .LoadObject(GetDataFileName("SSIIR"));

                Utils.StopTimer();
            }
            else
            {
                Utils.StartTimer();
                Utils.PrintHeading("Compute user-user similarities (rating based)");
                Metric.GetPearsonOfRows(R_train, MaxCountOfNeighbors, StrongSimilarityThreshold,
                                        out UserSimilaritiesOfRating);
                if (saveLoadedData)
                {
                    Utils.IO <SimilarityData> .SaveObject(UserSimilaritiesOfRating, GetDataFileName("USR"));
                }
                Utils.StopTimer();

                Utils.StartTimer();
                Utils.PrintHeading("Compute item-item similarities (rating based)");
                Metric.GetPearsonOfColumns(R_train, MaxCountOfNeighbors, StrongSimilarityThreshold,
                                           out ItemSimilaritiesOfRating, out StrongSimilarityIndicatorsByItemRating);
                if (saveLoadedData)
                {
                    Utils.IO <SimilarityData> .SaveObject(ItemSimilaritiesOfRating, GetDataFileName("ISR"));

                    Utils.IO <HashSet <Tuple <int, int> > >
                    .SaveObject(StrongSimilarityIndicatorsByItemRating, GetDataFileName("SSIIR"));
                }
                Utils.StopTimer();
            }
            #endregion

            ReadyForNumerical = true;

            return(log.ToString());
        }