Beispiel #1
0
        public string RunNMFbasedOMF(int maxEpoch, double learnRate, double regularization, int factorCount,
                                     List <double> quantizer, int topN = 0)
        {
            if (!ReadyForNumerical)
            {
                GetReadyForNumerical();
            }
            StringBuilder log = new StringBuilder();

            log.AppendLine(Utils.PrintHeading("NMF based OMF"));

            // NMF Prediction
            // Get ratings from scorer, for both train and test
            // R_all contains indexes of all ratings both train and test
            DataMatrix R_all = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);

            R_all.MergeNonOverlap(R_unknown);
            R_all.MergeNonOverlap(R_train.IndexesOfNonZeroElements());
            Utils.StartTimer();
            DataMatrix R_predictedByNMF = NMF.PredictRatings(R_train, R_all, maxEpoch,
                                                             learnRate, regularization, factorCount);

            log.AppendLine(Utils.StopTimer());

            // OMF Prediction
            log.AppendLine(Utils.PrintHeading("Ordinal Matrix Factorization with NMF as scorer"));
            Utils.StartTimer();
            Dictionary <Tuple <int, int>, List <double> > OMFDistributionByUserItem;
            DataMatrix R_predicted;

            log.AppendLine(OMF.PredictRatings(R_train.Matrix, R_unknown.Matrix, R_predictedByNMF.Matrix,
                                              quantizer, out R_predicted, out OMFDistributionByUserItem));
            log.AppendLine(Utils.StopTimer());

            // Numerical Evaluation
            log.AppendLine(Utils.PrintValue("RMSE", RMSE.Evaluate(R_test, R_predicted).ToString("0.0000")));
            log.AppendLine(Utils.PrintValue("MAE", MAE.Evaluate(R_test, R_predicted).ToString("0.0000")));

            // TopN Evaluation
            if (topN != 0)
            {
                var topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R_predicted, topN);
                for (int n = 1; n <= topN; n++)
                {
                    log.AppendLine(Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")));
                }
                for (int n = 1; n <= topN; n++)
                {
                    log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")));
                }
            }

            // Save OMFDistribution to file
            if (!File.Exists(GetDataFileName("RatingOMF_")))
            {
                Utils.IO <Dictionary <Tuple <int, int>, List <double> > > .SaveObject(OMFDistributionByUserItem, GetDataFileName("RatingOMF_"));
            }

            return(log.ToString());
        }
Beispiel #2
0
        public string GetReadyForNumerical(bool saveLoadedData = true)
        {
            if (ReadyForNumerical)
            {
                return("Is ready.");
            }

            StringBuilder log = new StringBuilder();

            Utils.StartTimer();

            log.AppendLine(Utils.PrintHeading("Create R_train/R_test sets from " + DataSetFile));
            Utils.LoadMovieLensSplitByCount(DataSetFile, out R_train,
                                            out R_test, MinCountOfRatings, MaxCountOfRatings, CountOfRatingsForTrain, ShuffleData, Seed);

            Console.WriteLine(R_train.DatasetBrief("Train set"));
            Console.WriteLine(R_test.DatasetBrief("Test set"));
            log.AppendLine(R_train.DatasetBrief("Train set"));
            log.AppendLine(R_test.DatasetBrief("Test set"));

            R_unknown = R_test.IndexesOfNonZeroElements();

            log.AppendLine(Utils.PrintValue("Relevant item criteria", RelevantItemCriteria.ToString("0.0")));
            RelevantItemsByUser = ItemRecommendationCore.GetRelevantItemsByUser(R_test, RelevantItemCriteria);
            log.AppendLine(Utils.PrintValue("Mean # of relevant items per user",
                                            RelevantItemsByUser.Average(k => k.Value.Count).ToString("0")));
            log.AppendLine(Utils.StopTimer());

            #region Prepare similarity data
            if (File.Exists(GetDataFileName("USR")) &&
                File.Exists(GetDataFileName("ISR")) &&
                File.Exists(GetDataFileName("SSIIR")))
            {
                Utils.StartTimer();
                Utils.PrintHeading("Load user-user similarities (rating based)");
                UserSimilaritiesOfRating = Utils.IO <SimilarityData> .LoadObject(GetDataFileName("USR"));

                Utils.StopTimer();

                Utils.StartTimer();
                Utils.PrintHeading("Load item-item similarities (rating based)");
                ItemSimilaritiesOfRating = Utils.IO <SimilarityData> .LoadObject(GetDataFileName("ISR"));

                Utils.StopTimer();

                Utils.StartTimer();
                Utils.PrintHeading("Load item-item strong similarity indicators (rating based)");
                StrongSimilarityIndicatorsByItemRating = Utils.IO <HashSet <Tuple <int, int> > > .LoadObject(GetDataFileName("SSIIR"));

                Utils.StopTimer();
            }
            else
            {
                Utils.StartTimer();
                Utils.PrintHeading("Compute user-user similarities (rating based)");
                Metric.GetPearsonOfRows(R_train, MaxCountOfNeighbors, StrongSimilarityThreshold,
                                        out UserSimilaritiesOfRating);
                if (saveLoadedData)
                {
                    Utils.IO <SimilarityData> .SaveObject(UserSimilaritiesOfRating, GetDataFileName("USR"));
                }
                Utils.StopTimer();

                Utils.StartTimer();
                Utils.PrintHeading("Compute item-item similarities (rating based)");
                Metric.GetPearsonOfColumns(R_train, MaxCountOfNeighbors, StrongSimilarityThreshold,
                                           out ItemSimilaritiesOfRating, out StrongSimilarityIndicatorsByItemRating);
                if (saveLoadedData)
                {
                    Utils.IO <SimilarityData> .SaveObject(ItemSimilaritiesOfRating, GetDataFileName("ISR"));

                    Utils.IO <HashSet <Tuple <int, int> > >
                    .SaveObject(StrongSimilarityIndicatorsByItemRating, GetDataFileName("SSIIR"));
                }
                Utils.StopTimer();
            }
            #endregion

            ReadyForNumerical = true;

            return(log.ToString());
        }
Beispiel #3
0
        public static DataMatrix PredictRatings(DataMatrix R_train, DataMatrix R_unknown,
                                                int maxEpoch, double learnRate, double regularization, int factorCount)
        {
            int        userCount       = R_train.UserCount;
            int        itemCount       = R_train.ItemCount;
            int        ratingCount     = R_train.NonZerosCount;
            double     meanOfGlobal    = R_train.GetGlobalMean();
            DataMatrix R_train_unknown = R_train.IndexesOfNonZeroElements();  // For testing convergence

            // User latent vectors with default seed
            Matrix <double> P = Utils.CreateRandomMatrixFromNormal(userCount, factorCount, 0, 0.1, Config.Seed);
            // Matrix<double> P = Utils.CreateRandomMatrixFromUniform(userCount, factorCount, 0, 0.1, Config.Seed);
            // Item latent vectors with a different seed
            Matrix <double> Q = Utils.CreateRandomMatrixFromNormal(factorCount, itemCount, 0, 0.1, Config.Seed + 1);
            //Matrix<double> Q = Utils.CreateRandomMatrixFromUniform(factorCount, itemCount, 0, 0.1, Config.Seed + 1);

            // SGD
            double e_prev = double.MaxValue;

            for (int epoch = 0; epoch < maxEpoch; ++epoch)
            {
                foreach (Tuple <int, int, double> element in R_train.Ratings)
                {
                    int    indexOfUser = element.Item1;
                    int    indexOfItem = element.Item2;
                    double rating      = element.Item3;

                    double e_ij = rating - (meanOfGlobal + P.Row(indexOfUser).DotProduct(Q.Column(indexOfItem)));

                    // Update feature vectors
                    Vector <double> P_u = P.Row(indexOfUser);
                    Vector <double> Q_i = Q.Column(indexOfItem);

                    Vector <double> P_u_updated = P_u + (Q_i.Multiply(e_ij) - P_u.Multiply(regularization)).Multiply(learnRate);
                    P.SetRow(indexOfUser, P_u_updated);

                    Vector <double> Q_i_updated = Q_i + (P_u.Multiply(e_ij) - Q_i.Multiply(regularization)).Multiply(learnRate);
                    Q.SetColumn(indexOfItem, Q_i_updated);

                    #region Update feature vectors loop version

                    /*
                     * // Update feature vectors
                     * for (int k = 0; k < factorCount; ++k)
                     * {
                     *  double factorOfUser = P[indexOfUser, k];
                     *  double factorOfItem = Q[k, indexOfItem];
                     *
                     *  P[indexOfUser, k] += learnRate * (e_ij * factorOfItem - regularization * factorOfUser);
                     *  Q[k, indexOfItem] += learnRate * (e_ij * factorOfUser - regularization * factorOfItem);
                     * }
                     */
                    #endregion
                }

                // Display the current regularized error see if it converges

                double e_curr = 0;
                if (epoch == 0 || epoch == maxEpoch - 1 || epoch % (int)Math.Ceiling(maxEpoch * 0.1) == 4)
                {
                    Matrix <double> predictedMatrix      = R_train_unknown.PointwiseMultiply(P.Multiply(Q));
                    SparseMatrix    correctMatrix        = R_train.Matrix;
                    double          squaredError         = (correctMatrix - predictedMatrix).SquaredSum();
                    double          regularizationPenaty = regularization * (P.SquaredSum() + Q.SquaredSum());
                    double          objective            = squaredError + regularizationPenaty;

                    #region Linear implementation

                    /*
                     * double e = 0;
                     * foreach (Tuple<int, int, double> element in R_train.Ratings)
                     * {
                     *  int indexOfUser = element.Item1;
                     *  int indexOfItem = element.Item2;
                     *  double rating = element.Item3;
                     *
                     *  e += Math.Pow(rating - P.Row(indexOfUser).DotProduct(Q.Column(indexOfItem)), 2);
                     *
                     *  for (int k = 0; k < factorCount; ++k)
                     *  {
                     *      e += (regularization / 2) * (Math.Pow(P[indexOfUser, k], 2) + Math.Pow(Q[k, indexOfItem], 2));
                     *  }
                     * }
                     */
                    #endregion

                    // Record the current error
                    e_curr = objective;

                    // Stop the learning if the regularized error falls below a certain threshold
                    if (e_prev - e_curr < 0.001)
                    {
                        Console.WriteLine("Improvment less than 0.001, learning stopped.");
                        break;
                    }
                    e_prev = e_curr;

                    Utils.PrintEpoch("Epoch", epoch, maxEpoch, "Objective cost", objective);
                }
            }

            SparseMatrix R_predicted = new SparseMatrix(R_unknown.UserCount, R_unknown.ItemCount);
            foreach (var element in R_unknown.Matrix.EnumerateIndexed(Zeros.AllowSkip))
            {
                int    indexOfUser = element.Item1;
                int    indexOfItem = element.Item2;
                double r_predicted = meanOfGlobal + P.Row(indexOfUser) * Q.Column(indexOfItem);

                if (r_predicted > Config.Ratings.MaxRating)
                {
                    r_predicted = Config.Ratings.MaxRating;
                }
                if (r_predicted < Config.Ratings.MinRating)
                {
                    r_predicted = Config.Ratings.MinRating;
                }

                R_predicted[indexOfUser, indexOfItem] = r_predicted;
            }
            return(new DataMatrix(R_predicted));
            //return new RatingMatrix(R_unknown.PointwiseMultiply(P.Multiply(Q)));
        }