public string RunNMFbasedOMF(int maxEpoch, double learnRate, double regularization, int factorCount, List <double> quantizer, int topN = 0) { if (!ReadyForNumerical) { GetReadyForNumerical(); } StringBuilder log = new StringBuilder(); log.AppendLine(Utils.PrintHeading("NMF based OMF")); // NMF Prediction // Get ratings from scorer, for both train and test // R_all contains indexes of all ratings both train and test DataMatrix R_all = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount); R_all.MergeNonOverlap(R_unknown); R_all.MergeNonOverlap(R_train.IndexesOfNonZeroElements()); Utils.StartTimer(); DataMatrix R_predictedByNMF = NMF.PredictRatings(R_train, R_all, maxEpoch, learnRate, regularization, factorCount); log.AppendLine(Utils.StopTimer()); // OMF Prediction log.AppendLine(Utils.PrintHeading("Ordinal Matrix Factorization with NMF as scorer")); Utils.StartTimer(); Dictionary <Tuple <int, int>, List <double> > OMFDistributionByUserItem; DataMatrix R_predicted; log.AppendLine(OMF.PredictRatings(R_train.Matrix, R_unknown.Matrix, R_predictedByNMF.Matrix, quantizer, out R_predicted, out OMFDistributionByUserItem)); log.AppendLine(Utils.StopTimer()); // Numerical Evaluation log.AppendLine(Utils.PrintValue("RMSE", RMSE.Evaluate(R_test, R_predicted).ToString("0.0000"))); log.AppendLine(Utils.PrintValue("MAE", MAE.Evaluate(R_test, R_predicted).ToString("0.0000"))); // TopN Evaluation if (topN != 0) { var topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R_predicted, topN); for (int n = 1; n <= topN; n++) { log.AppendLine(Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000"))); } for (int n = 1; n <= topN; n++) { log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000"))); } } // Save OMFDistribution to file if (!File.Exists(GetDataFileName("RatingOMF_"))) { Utils.IO <Dictionary <Tuple <int, int>, List <double> > > .SaveObject(OMFDistributionByUserItem, GetDataFileName("RatingOMF_")); } return(log.ToString()); }
public string GetReadyForNumerical(bool saveLoadedData = true) { if (ReadyForNumerical) { return("Is ready."); } StringBuilder log = new StringBuilder(); Utils.StartTimer(); log.AppendLine(Utils.PrintHeading("Create R_train/R_test sets from " + DataSetFile)); Utils.LoadMovieLensSplitByCount(DataSetFile, out R_train, out R_test, MinCountOfRatings, MaxCountOfRatings, CountOfRatingsForTrain, ShuffleData, Seed); Console.WriteLine(R_train.DatasetBrief("Train set")); Console.WriteLine(R_test.DatasetBrief("Test set")); log.AppendLine(R_train.DatasetBrief("Train set")); log.AppendLine(R_test.DatasetBrief("Test set")); R_unknown = R_test.IndexesOfNonZeroElements(); log.AppendLine(Utils.PrintValue("Relevant item criteria", RelevantItemCriteria.ToString("0.0"))); RelevantItemsByUser = ItemRecommendationCore.GetRelevantItemsByUser(R_test, RelevantItemCriteria); log.AppendLine(Utils.PrintValue("Mean # of relevant items per user", RelevantItemsByUser.Average(k => k.Value.Count).ToString("0"))); log.AppendLine(Utils.StopTimer()); #region Prepare similarity data if (File.Exists(GetDataFileName("USR")) && File.Exists(GetDataFileName("ISR")) && File.Exists(GetDataFileName("SSIIR"))) { Utils.StartTimer(); Utils.PrintHeading("Load user-user similarities (rating based)"); UserSimilaritiesOfRating = Utils.IO <SimilarityData> .LoadObject(GetDataFileName("USR")); Utils.StopTimer(); Utils.StartTimer(); Utils.PrintHeading("Load item-item similarities (rating based)"); ItemSimilaritiesOfRating = Utils.IO <SimilarityData> .LoadObject(GetDataFileName("ISR")); Utils.StopTimer(); Utils.StartTimer(); Utils.PrintHeading("Load item-item strong similarity indicators (rating based)"); StrongSimilarityIndicatorsByItemRating = Utils.IO <HashSet <Tuple <int, int> > > .LoadObject(GetDataFileName("SSIIR")); Utils.StopTimer(); } else { Utils.StartTimer(); Utils.PrintHeading("Compute user-user similarities (rating based)"); Metric.GetPearsonOfRows(R_train, MaxCountOfNeighbors, StrongSimilarityThreshold, out UserSimilaritiesOfRating); if (saveLoadedData) { Utils.IO <SimilarityData> .SaveObject(UserSimilaritiesOfRating, GetDataFileName("USR")); } Utils.StopTimer(); Utils.StartTimer(); Utils.PrintHeading("Compute item-item similarities (rating based)"); Metric.GetPearsonOfColumns(R_train, MaxCountOfNeighbors, StrongSimilarityThreshold, out ItemSimilaritiesOfRating, out StrongSimilarityIndicatorsByItemRating); if (saveLoadedData) { Utils.IO <SimilarityData> .SaveObject(ItemSimilaritiesOfRating, GetDataFileName("ISR")); Utils.IO <HashSet <Tuple <int, int> > > .SaveObject(StrongSimilarityIndicatorsByItemRating, GetDataFileName("SSIIR")); } Utils.StopTimer(); } #endregion ReadyForNumerical = true; return(log.ToString()); }
public static DataMatrix PredictRatings(DataMatrix R_train, DataMatrix R_unknown, int maxEpoch, double learnRate, double regularization, int factorCount) { int userCount = R_train.UserCount; int itemCount = R_train.ItemCount; int ratingCount = R_train.NonZerosCount; double meanOfGlobal = R_train.GetGlobalMean(); DataMatrix R_train_unknown = R_train.IndexesOfNonZeroElements(); // For testing convergence // User latent vectors with default seed Matrix <double> P = Utils.CreateRandomMatrixFromNormal(userCount, factorCount, 0, 0.1, Config.Seed); // Matrix<double> P = Utils.CreateRandomMatrixFromUniform(userCount, factorCount, 0, 0.1, Config.Seed); // Item latent vectors with a different seed Matrix <double> Q = Utils.CreateRandomMatrixFromNormal(factorCount, itemCount, 0, 0.1, Config.Seed + 1); //Matrix<double> Q = Utils.CreateRandomMatrixFromUniform(factorCount, itemCount, 0, 0.1, Config.Seed + 1); // SGD double e_prev = double.MaxValue; for (int epoch = 0; epoch < maxEpoch; ++epoch) { foreach (Tuple <int, int, double> element in R_train.Ratings) { int indexOfUser = element.Item1; int indexOfItem = element.Item2; double rating = element.Item3; double e_ij = rating - (meanOfGlobal + P.Row(indexOfUser).DotProduct(Q.Column(indexOfItem))); // Update feature vectors Vector <double> P_u = P.Row(indexOfUser); Vector <double> Q_i = Q.Column(indexOfItem); Vector <double> P_u_updated = P_u + (Q_i.Multiply(e_ij) - P_u.Multiply(regularization)).Multiply(learnRate); P.SetRow(indexOfUser, P_u_updated); Vector <double> Q_i_updated = Q_i + (P_u.Multiply(e_ij) - Q_i.Multiply(regularization)).Multiply(learnRate); Q.SetColumn(indexOfItem, Q_i_updated); #region Update feature vectors loop version /* * // Update feature vectors * for (int k = 0; k < factorCount; ++k) * { * double factorOfUser = P[indexOfUser, k]; * double factorOfItem = Q[k, indexOfItem]; * * P[indexOfUser, k] += learnRate * (e_ij * factorOfItem - regularization * factorOfUser); * Q[k, indexOfItem] += learnRate * (e_ij * factorOfUser - regularization * factorOfItem); * } */ #endregion } // Display the current regularized error see if it converges double e_curr = 0; if (epoch == 0 || epoch == maxEpoch - 1 || epoch % (int)Math.Ceiling(maxEpoch * 0.1) == 4) { Matrix <double> predictedMatrix = R_train_unknown.PointwiseMultiply(P.Multiply(Q)); SparseMatrix correctMatrix = R_train.Matrix; double squaredError = (correctMatrix - predictedMatrix).SquaredSum(); double regularizationPenaty = regularization * (P.SquaredSum() + Q.SquaredSum()); double objective = squaredError + regularizationPenaty; #region Linear implementation /* * double e = 0; * foreach (Tuple<int, int, double> element in R_train.Ratings) * { * int indexOfUser = element.Item1; * int indexOfItem = element.Item2; * double rating = element.Item3; * * e += Math.Pow(rating - P.Row(indexOfUser).DotProduct(Q.Column(indexOfItem)), 2); * * for (int k = 0; k < factorCount; ++k) * { * e += (regularization / 2) * (Math.Pow(P[indexOfUser, k], 2) + Math.Pow(Q[k, indexOfItem], 2)); * } * } */ #endregion // Record the current error e_curr = objective; // Stop the learning if the regularized error falls below a certain threshold if (e_prev - e_curr < 0.001) { Console.WriteLine("Improvment less than 0.001, learning stopped."); break; } e_prev = e_curr; Utils.PrintEpoch("Epoch", epoch, maxEpoch, "Objective cost", objective); } } SparseMatrix R_predicted = new SparseMatrix(R_unknown.UserCount, R_unknown.ItemCount); foreach (var element in R_unknown.Matrix.EnumerateIndexed(Zeros.AllowSkip)) { int indexOfUser = element.Item1; int indexOfItem = element.Item2; double r_predicted = meanOfGlobal + P.Row(indexOfUser) * Q.Column(indexOfItem); if (r_predicted > Config.Ratings.MaxRating) { r_predicted = Config.Ratings.MaxRating; } if (r_predicted < Config.Ratings.MinRating) { r_predicted = Config.Ratings.MinRating; } R_predicted[indexOfUser, indexOfItem] = r_predicted; } return(new DataMatrix(R_predicted)); //return new RatingMatrix(R_unknown.PointwiseMultiply(P.Multiply(Q))); }