public string RunNMFbasedOMF(int maxEpoch, double learnRate, double regularization, int factorCount, List <double> quantizer, int topN = 0) { if (!ReadyForNumerical) { GetReadyForNumerical(); } StringBuilder log = new StringBuilder(); log.AppendLine(Utils.PrintHeading("NMF based OMF")); // NMF Prediction // Get ratings from scorer, for both train and test // R_all contains indexes of all ratings both train and test DataMatrix R_all = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount); R_all.MergeNonOverlap(R_unknown); R_all.MergeNonOverlap(R_train.IndexesOfNonZeroElements()); Utils.StartTimer(); DataMatrix R_predictedByNMF = NMF.PredictRatings(R_train, R_all, maxEpoch, learnRate, regularization, factorCount); log.AppendLine(Utils.StopTimer()); // OMF Prediction log.AppendLine(Utils.PrintHeading("Ordinal Matrix Factorization with NMF as scorer")); Utils.StartTimer(); Dictionary <Tuple <int, int>, List <double> > OMFDistributionByUserItem; DataMatrix R_predicted; log.AppendLine(OMF.PredictRatings(R_train.Matrix, R_unknown.Matrix, R_predictedByNMF.Matrix, quantizer, out R_predicted, out OMFDistributionByUserItem)); log.AppendLine(Utils.StopTimer()); // Numerical Evaluation log.AppendLine(Utils.PrintValue("RMSE", RMSE.Evaluate(R_test, R_predicted).ToString("0.0000"))); log.AppendLine(Utils.PrintValue("MAE", MAE.Evaluate(R_test, R_predicted).ToString("0.0000"))); // TopN Evaluation if (topN != 0) { var topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R_predicted, topN); for (int n = 1; n <= topN; n++) { log.AppendLine(Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000"))); } for (int n = 1; n <= topN; n++) { log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000"))); } } // Save OMFDistribution to file if (!File.Exists(GetDataFileName("RatingOMF_"))) { Utils.IO <Dictionary <Tuple <int, int>, List <double> > > .SaveObject(OMFDistributionByUserItem, GetDataFileName("RatingOMF_")); } return(log.ToString()); }
public string RunPrefKNN(int neighborCount, int topN = 10) { if (!ReadyForOrdinal) { GetReadyForOrdinal(); } StringBuilder log = new StringBuilder(); log.AppendLine(Utils.PrintHeading("PrefKNN")); // Prediction Utils.StartTimer(); DataMatrix R_predicted = PrefUserKNN.PredictRatings(PR_train, R_unknown, neighborCount, UserSimilaritiesOfPref); log.AppendLine(Utils.StopTimer()); // TopN Evaluation var topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R_predicted, topN); for (int n = 1; n <= topN; n++) { Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")); } for (int n = 1; n <= topN; n++) { log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000"))); } return(log.ToString()); }
public string RunPrefNMF(int maxEpoch, double learnRate, double regularizationOfUser, double regularizationOfItem, int factorCount, int topN = 10) { if (!ReadyForOrdinal) { GetReadyForOrdinal(); } StringBuilder log = new StringBuilder(); log.AppendLine(Utils.PrintHeading("PrefNMF")); // Prediction Utils.StartTimer(); DataMatrix R_predicted = PrefNMF.PredictRatings(PR_train, R_unknown, maxEpoch, learnRate, regularizationOfUser, regularizationOfItem, factorCount); log.AppendLine(Utils.StopTimer()); // Evaluation var topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R_predicted, topN); for (int n = 1; n <= topN; n++) { Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")); } for (int n = 1; n <= topN; n++) { log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000"))); } return(log.ToString()); }
public string RunPrefMRF(double regularization, double learnRate, int maxEpoch, List <double> quantizer, int topN = 10) { // Load OMFDistribution from file Dictionary <Tuple <int, int>, List <double> > OMFDistributionByUserItem; if (File.Exists(GetDataFileName("PrefOMF_"))) { OMFDistributionByUserItem = Utils.IO <Dictionary <Tuple <int, int>, List <double> > > .LoadObject(GetDataFileName("PrefOMF_")); } else { return("Abort, Run OMF first."); } if (!ReadyForOrdinal) { GetReadyForOrdinal(); } StringBuilder log = new StringBuilder(); log.AppendLine(Utils.PrintHeading("PrefMRF: PrefNMF based ORF")); // Prediction Utils.StartTimer(); DataMatrix R_predicted_expectations; DataMatrix R_predicted_mostlikely; // Convert PR_train into user-wise preferences DataMatrix R_train_positions = new DataMatrix(PR_train.GetPositionMatrix()); R_train_positions.Quantization(quantizer[0], quantizer[quantizer.Count - 1] - quantizer[0], quantizer); ORF orf = new ORF(); orf.PredictRatings(R_train_positions, R_unknown, StrongSimilarityIndicatorsByItemPref, OMFDistributionByUserItem, regularization, learnRate, maxEpoch, quantizer.Count, out R_predicted_expectations, out R_predicted_mostlikely); log.AppendLine(Utils.StopTimer()); // Evaluation var topNItemsByUser_expectations = ItemRecommendationCore.GetTopNItemsByUser(R_predicted_expectations, topN); for (int n = 1; n <= topN; n++) { log.AppendLine(Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser_expectations, n).ToString("0.0000"))); } for (int n = 1; n <= topN; n++) { log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser_expectations, n).ToString("0.0000"))); } return(log.ToString()); }
public string RunNMFbasedORF(double regularization, double learnRate, int maxEpoch, List <double> quantizer, int topN = 0) { // Load OMFDistribution from file Dictionary <Tuple <int, int>, List <double> > OMFDistributionByUserItem; if (File.Exists(GetDataFileName("RatingOMF_"))) { OMFDistributionByUserItem = Utils.IO <Dictionary <Tuple <int, int>, List <double> > > .LoadObject(GetDataFileName("RatingOMF_")); } else { return("Abort, Run OMF first."); } if (!ReadyForNumerical) { GetReadyForNumerical(); } StringBuilder log = new StringBuilder(); log.AppendLine(Utils.PrintHeading("NMF based ORF")); // Prediction Utils.StartTimer(); DataMatrix R_predicted_expectations; DataMatrix R_predicted_mostlikely; ORF orf = new ORF(); orf.PredictRatings(R_train, R_unknown, StrongSimilarityIndicatorsByItemRating, OMFDistributionByUserItem, regularization, learnRate, maxEpoch, quantizer.Count, out R_predicted_expectations, out R_predicted_mostlikely); log.AppendLine(Utils.StopTimer()); // Numerical Evaluation log.AppendLine(Utils.PrintValue("RMSE", RMSE.Evaluate(R_test, R_predicted_expectations).ToString("0.0000"))); log.AppendLine(Utils.PrintValue("MAE", RMSE.Evaluate(R_test, R_predicted_mostlikely).ToString("0.0000"))); // Top-N Evaluation if (topN != 0) { var topNItemsByUser_expectations = ItemRecommendationCore.GetTopNItemsByUser(R_predicted_expectations, topN); for (int n = 1; n <= topN; n++) { log.AppendLine(Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser_expectations, n).ToString("0.0000"))); } for (int n = 1; n <= topN; n++) { log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser_expectations, n).ToString("0.0000"))); } } return(log.ToString()); }
/// <summary> /// Rating based Non-negative Matrix Factorization /// </summary> public string RunNMF(int maxEpoch, double learnRate, double regularization, int factorCount, int topN = 0) { if (!ReadyForNumerical) { GetReadyForNumerical(); } StringBuilder log = new StringBuilder(); log.AppendLine(Utils.PrintHeading("NMF")); // Prediction Utils.StartTimer(); DataMatrix R_predicted = NMF.PredictRatings(R_train, R_unknown, maxEpoch, learnRate, regularization, factorCount); log.AppendLine(Utils.StopTimer()); // Numerical Evaluation log.AppendLine(Utils.PrintValue("RMSE", RMSE.Evaluate(R_test, R_predicted).ToString("0.0000"))); log.AppendLine(Utils.PrintValue("MAE", MAE.Evaluate(R_test, R_predicted).ToString("0.0000"))); // TopN Evaluation if (topN != 0) { var topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R_predicted, topN); for (int n = 1; n <= topN; n++) { log.AppendLine(Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000"))); } for (int n = 1; n <= topN; n++) { log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000"))); } } return(log.ToString()); }
public void GetTopNItemsByUser() { /* * 5 3 0 1 * 4 0 0 1 * 1 1 0 5 * 1 0 0 4 * 0 1 5 4 */ DataMatrix R = GetSampleRatingMatrix(); // act Dictionary <int, List <int> > topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R, 2); // assert Debug.Assert(topNItemsByUser[0].Count == 2); Debug.Assert(topNItemsByUser[0][0] == 0); Debug.Assert(topNItemsByUser[0][1] == 1); Debug.Assert(topNItemsByUser[1][0] == 0); Debug.Assert(topNItemsByUser[1][1] == 3); Debug.Assert(topNItemsByUser[4][0] == 2); Debug.Assert(topNItemsByUser[4][1] == 3); }
/// <summary> /// Recommend the most popular (measured by mean rating) items to all users. /// </summary> public string RunMostPopular(int topN) { if (!ReadyForNumerical) { GetReadyForNumerical(); } StringBuilder log = new StringBuilder(); log.AppendLine(Utils.PrintHeading("Most popular")); // Prediction Utils.StartTimer(); var meanByItem = R_train.GetItemMeans(); DataMatrix R_predicted = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount); foreach (var element in R_unknown.Matrix.EnumerateIndexed(Zeros.AllowSkip)) { int indexOfUser = element.Item1; int indexOfItem = element.Item2; R_predicted[indexOfUser, indexOfItem] = meanByItem[indexOfItem]; } var topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R_predicted, topN); log.AppendLine(Utils.StopTimer()); // TopN Evaluation for (int n = 1; n <= topN; n++) { log.AppendLine(Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000"))); } for (int n = 1; n <= topN; n++) { log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000"))); } return(log.ToString()); }
public string RunUserKNN(int topN = 0) { if (!ReadyForNumerical) { GetReadyForNumerical(); } StringBuilder log = new StringBuilder(); log.AppendLine(Utils.PrintHeading("UserKNN")); // Prediction Utils.StartTimer(); DataMatrix R_predicted = Numerical.UserKNN.PredictRatings(R_train, R_unknown, UserSimilaritiesOfRating, MaxCountOfNeighbors); log.AppendLine(Utils.StopTimer()); // Numerical Evaluation log.AppendLine(Utils.PrintValue("RMSE", RMSE.Evaluate(R_test, R_predicted).ToString("0.0000"))); log.AppendLine(Utils.PrintValue("MAE", MAE.Evaluate(R_test, R_predicted).ToString("0.0000"))); // TopN Evaluation if (topN != 0) { var topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R_predicted, topN); for (int n = 1; n <= topN; n++) { Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")); } for (int n = 1; n <= topN; n++) { log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000"))); } } return(log.ToString()); }
public string RunPrefNMFbasedOMF(int maxEpoch, double learnRate, double regularizationOfUser, double regularizationOfItem, int factorCount, List <double> quantizer, int topN) { if (!ReadyForOrdinal) { GetReadyForOrdinal(); } StringBuilder log = new StringBuilder(); log.AppendLine(Utils.PrintHeading("PrefNMF based OMF")); // =============PrefNMF prediction on Train+Unknown============ // Get ratings from scorer, for both train and test // R_all contains indexes of all ratings both train and test // DataMatrix R_all = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount); // R_all.MergeNonOverlap(R_unknown); //R_all.MergeNonOverlap(R_train.IndexesOfNonZeroElements()); //PrefRelations PR_unknown = PrefRelations.CreateDiscrete(R_all); // R_all is far too slow, change the data structure //Dictionary<int, List<Tuple<int, int>>> PR_unknown = new Dictionary<int, List<Tuple<int, int>>>(); //Dictionary<int, List<int>> PR_unknown_cache = new Dictionary<int, List<int>>(); Dictionary <int, List <int> > ItemsByUser_train = R_train.GetItemsByUser(); Dictionary <int, List <int> > ItemsByUser_unknown = R_unknown.GetItemsByUser(); Dictionary <int, List <int> > PR_unknown = new Dictionary <int, List <int> >(ItemsByUser_train); List <int> keys = new List <int>(ItemsByUser_train.Keys); foreach (var key in keys) { PR_unknown[key].AddRange(ItemsByUser_unknown[key]); } /* * foreach (var row in R_unknown.Matrix.EnumerateRowsIndexed()) * { * int indexOfUser = row.Item1; * PR_unknown_cache[indexOfUser] = new List<int>(); * Vector<double> itemsOfUser = row.Item2; * foreach (var item in itemsOfUser.EnumerateIndexed(Zeros.AllowSkip)) * { * PR_unknown_cache[indexOfUser].Add(item.Item1); * } * } * foreach (var row in R_train.Matrix.EnumerateRowsIndexed()) * { * int indexOfUser = row.Item1; * Vector<double> itemsOfUser = row.Item2; * foreach (var item in itemsOfUser.EnumerateIndexed(Zeros.AllowSkip)) * { * PR_unknown_cache[indexOfUser].Add(item.Item1); * } * } */ Utils.StartTimer(); SparseMatrix PR_predicted = PrefNMF.PredictPrefRelations(PR_train, PR_unknown, maxEpoch, learnRate, regularizationOfUser, regularizationOfItem, factorCount, quantizer); // Both predicted and train need to be quantized // otherwise OMF won't accept //PR_predicted.quantization(0, 1.0, // new List<double> { Config.Preferences.LessPreferred, // Config.Preferences.EquallyPreferred, Config.Preferences.Preferred }); DataMatrix R_predictedByPrefNMF = new DataMatrix(PR_predicted);// new DataMatrix(PR_predicted.GetPositionMatrix()); // PR_train itself is already in quantized form! //PR_train.quantization(0, 1.0, new List<double> { Config.Preferences.LessPreferred, Config.Preferences.EquallyPreferred, Config.Preferences.Preferred }); DataMatrix R_train_positions = new DataMatrix(PR_train.GetPositionMatrix()); R_train_positions.Quantization(quantizer[0], quantizer[quantizer.Count - 1] - quantizer[0], quantizer); log.AppendLine(Utils.StopTimer()); // =============OMF prediction on Train+Unknown============ log.AppendLine(Utils.PrintHeading("Ordinal Matrix Factorization with PrefNMF as scorer")); Utils.StartTimer(); Dictionary <Tuple <int, int>, List <double> > OMFDistributionByUserItem; DataMatrix R_predicted; log.AppendLine(OMF.PredictRatings(R_train_positions.Matrix, R_unknown.Matrix, R_predictedByPrefNMF.Matrix, quantizer, out R_predicted, out OMFDistributionByUserItem)); log.AppendLine(Utils.StopTimer()); // TopN Evaluation var topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R_predicted, topN); for (int n = 1; n <= topN; n++) { log.AppendLine(Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000"))); } for (int n = 1; n <= topN; n++) { log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000"))); } // Save OMFDistribution to file if (!File.Exists(GetDataFileName("PrefOMF_"))) { Utils.IO <Dictionary <Tuple <int, int>, List <double> > > .SaveObject(OMFDistributionByUserItem, GetDataFileName("PrefOMF_")); } return(log.ToString()); }
public string GetReadyForNumerical(bool saveLoadedData = true) { if (ReadyForNumerical) { return("Is ready."); } StringBuilder log = new StringBuilder(); Utils.StartTimer(); log.AppendLine(Utils.PrintHeading("Create R_train/R_test sets from " + DataSetFile)); Utils.LoadMovieLensSplitByCount(DataSetFile, out R_train, out R_test, MinCountOfRatings, MaxCountOfRatings, CountOfRatingsForTrain, ShuffleData, Seed); Console.WriteLine(R_train.DatasetBrief("Train set")); Console.WriteLine(R_test.DatasetBrief("Test set")); log.AppendLine(R_train.DatasetBrief("Train set")); log.AppendLine(R_test.DatasetBrief("Test set")); R_unknown = R_test.IndexesOfNonZeroElements(); log.AppendLine(Utils.PrintValue("Relevant item criteria", RelevantItemCriteria.ToString("0.0"))); RelevantItemsByUser = ItemRecommendationCore.GetRelevantItemsByUser(R_test, RelevantItemCriteria); log.AppendLine(Utils.PrintValue("Mean # of relevant items per user", RelevantItemsByUser.Average(k => k.Value.Count).ToString("0"))); log.AppendLine(Utils.StopTimer()); #region Prepare similarity data if (File.Exists(GetDataFileName("USR")) && File.Exists(GetDataFileName("ISR")) && File.Exists(GetDataFileName("SSIIR"))) { Utils.StartTimer(); Utils.PrintHeading("Load user-user similarities (rating based)"); UserSimilaritiesOfRating = Utils.IO <SimilarityData> .LoadObject(GetDataFileName("USR")); Utils.StopTimer(); Utils.StartTimer(); Utils.PrintHeading("Load item-item similarities (rating based)"); ItemSimilaritiesOfRating = Utils.IO <SimilarityData> .LoadObject(GetDataFileName("ISR")); Utils.StopTimer(); Utils.StartTimer(); Utils.PrintHeading("Load item-item strong similarity indicators (rating based)"); StrongSimilarityIndicatorsByItemRating = Utils.IO <HashSet <Tuple <int, int> > > .LoadObject(GetDataFileName("SSIIR")); Utils.StopTimer(); } else { Utils.StartTimer(); Utils.PrintHeading("Compute user-user similarities (rating based)"); Metric.GetPearsonOfRows(R_train, MaxCountOfNeighbors, StrongSimilarityThreshold, out UserSimilaritiesOfRating); if (saveLoadedData) { Utils.IO <SimilarityData> .SaveObject(UserSimilaritiesOfRating, GetDataFileName("USR")); } Utils.StopTimer(); Utils.StartTimer(); Utils.PrintHeading("Compute item-item similarities (rating based)"); Metric.GetPearsonOfColumns(R_train, MaxCountOfNeighbors, StrongSimilarityThreshold, out ItemSimilaritiesOfRating, out StrongSimilarityIndicatorsByItemRating); if (saveLoadedData) { Utils.IO <SimilarityData> .SaveObject(ItemSimilaritiesOfRating, GetDataFileName("ISR")); Utils.IO <HashSet <Tuple <int, int> > > .SaveObject(StrongSimilarityIndicatorsByItemRating, GetDataFileName("SSIIR")); } Utils.StopTimer(); } #endregion ReadyForNumerical = true; return(log.ToString()); }