Example #1
0
        /// <summary>
        /// Merge another matrix to this matrix. It is required that two matrixes do not overlap
        /// </summary>
        /// <param name="matrix"></param>
        public void MergeNonOverlap(DataMatrix matrix)
        {
            int count = ratingMatrix.NonZerosCount;

            ratingMatrix += matrix.Matrix;
            Debug.Assert(count + matrix.Matrix.NonZerosCount == ratingMatrix.NonZerosCount);
        }
Example #2
0
        private static DataMatrix GetSampleRatingMatrix()
        {
            /*
             * 5  3  0  1
             * 4  0  0  1
             * 1  1  0  5
             * 1  0  0  4
             * 0  1  5  4
             */
            DataMatrix R = new DataMatrix(new SparseMatrix(5, 4));
            R[0, 0] = 5;
            R[0, 1] = 3;
            R[0, 3] = 1;
            R[1, 0] = 4;
            R[1, 3] = 1;
            R[2, 0] = 1;
            R[2, 1] = 1;
            R[2, 3] = 5;
            R[3, 0] = 1;
            R[3, 3] = 4;
            R[4, 1] = 1;
            R[4, 2] = 5;
            R[4, 3] = 4;

            return R;
        }
        // Get the relevant items of each user, i.e. rated no lower than the criteria
        public static Dictionary<int, List<int>> GetRelevantItemsByUser(DataMatrix R, double criteria)
        {
            int userCount = R.UserCount;
            int itemCount = R.ItemCount;
            Dictionary<int, List<int>> relevantItemsByUser = new Dictionary<int, List<int>>(userCount);

            // Select relevant items for each user
            foreach (Tuple<int, Vector<double>> user in R.Users)
            {
                int userIndex = user.Item1;
                RatingVector userRatings = new RatingVector(user.Item2);
                List<int> relevantItems = new List<int>();

                foreach (Tuple<int, double> element in userRatings.Ratings)
                {
                    int itemIndex = element.Item1;
                    double rating = element.Item2;
                    if (rating >= criteria)
                    {
                        // This is a relevant item
                        relevantItems.Add(itemIndex);
                    }
                }
                relevantItemsByUser[userIndex] = relevantItems;
            }

            return relevantItemsByUser;
        }
Example #4
0
 public static void GetCosineOfRows(DataMatrix R, int maxCountOfNeighbors, 
     double strongSimilarityThreshold, out SimilarityData neighborsByObject)
 {
     HashSet<Tuple<int, int>> foo;
     ComputeSimilarities(R.Matrix, SimilarityMetric.CosineRating, maxCountOfNeighbors,
         strongSimilarityThreshold, out neighborsByObject, out foo);
 }
Example #5
0
 public static void GetCosineOfColumns(DataMatrix R, int maxCountOfNeighbors,
     double strongSimilarityThreshold, out SimilarityData neighborsByObject,
     out HashSet<Tuple<int, int>> strongSimilarityIndicators)
 {
     // Just rotate the matrix
     ComputeSimilarities(R.Matrix.Transpose(), SimilarityMetric.CosineRating, maxCountOfNeighbors,
         strongSimilarityThreshold, out neighborsByObject, out strongSimilarityIndicators);
 }
        /// <summary>
        /// Get the top N items of each user.
        /// The selection is based on the values stored in the matrix,
        /// where an item with higher value is considered as better.
        /// </summary>
        /// <param name="R">The user-by-item matrix, 
        /// each value indicates the quality of the item to a user.</param>
        /// <param name="topN">The number of items to be recommended for each user.</param>
        /// <returns>Each Key is a user and Value is the top N recommended items for that user.</returns>
        public static Dictionary<int, List<int>> GetTopNItemsByUser(DataMatrix R, int topN)
        {
            int userCount = R.UserCount;
            int itemCount = R.ItemCount;
            Dictionary<int, List<int>> topNItemsByUser = new Dictionary<int, List<int>>(userCount);

            // Select top N items for each user
            foreach (Tuple<int, Vector<double>> user in R.Users)
            {
                int indexOfUser = user.Item1;

                // To be sorted soon
                List<double> ratingsOfItemsSortedByRating = user.Item2.ToList();

                // TODO: this is important, because some models like PrefNMF may produce
                // negative scores, without setting the 0 to negative infinity these
                // items will be ranked before the test items and they are always not relevant items!
                ratingsOfItemsSortedByRating.ForEach(x => x = x == 0 ? double.NegativeInfinity : x);
                List<int> indexesOfItemsSortedByRating = Enumerable.Range(0, ratingsOfItemsSortedByRating.Count).ToList();

                // Sort by rating
                Sorting.Sort<double, int>(ratingsOfItemsSortedByRating, indexesOfItemsSortedByRating);

                // Make it descending order by rating
                ratingsOfItemsSortedByRating.Reverse();
                indexesOfItemsSortedByRating.Reverse();

                topNItemsByUser[indexOfUser] = indexesOfItemsSortedByRating.GetRange(0, topN);

                // In case the ratings of the top N items need to be stored
                // in the future, implement the following:
                //for (int i = 0; i < topN; ++i)
                //{
                // ratingsOfItemsSortedByRating[i] is the rating of the ith item in topN list
                // indexesOfItemsSortedByRating[i] is the index (in the R) of the ith item in topN list
                //}
            }

            return topNItemsByUser;
        }
Example #7
0
        public static void GetPearsonOfColumns(DataMatrix R, int maxCountOfNeighbors,
            double strongSimilarityThreshold, out SimilarityData neighborsByObject,
            out HashSet<Tuple<int, int>> strongSimilarityIndicators)
        {
            ComputeSimilarities(R.Matrix.Transpose(), SimilarityMetric.PearsonRating, maxCountOfNeighbors,
                strongSimilarityThreshold, out neighborsByObject, out strongSimilarityIndicators);

            // Debug
            for(int i = 0; i < R.ItemCount&&i<100; i++)
            {
                for (int j = 0; j < R.ItemCount&&j<100; j++)
                {
                    if (i == j) continue;
                    double corr_ij = Correlation.Pearson((SparseVector)R.Matrix.Column(i),(SparseVector)R.Matrix.Column(j));
                    if(corr_ij>strongSimilarityThreshold)
                    {
                        Debug.Assert(strongSimilarityIndicators.Contains(new Tuple<int, int>(i, j)));
                        Debug.Assert(strongSimilarityIndicators.Contains(new Tuple<int, int>(j, i)));
                    }
                }
            }
        }
Example #8
0
        public static DataMatrix PredictRatings(PrefRelations PR_train, DataMatrix R_unknown,
           int maxEpoch, double learnRate, double regularizationOfUser, double regularizationOfItem, int factorCount)
        {
            // Latent features
            List<Vector<double>> P;
            List<Vector<double>> Q;

            LearnLatentFeatures(PR_train, maxEpoch, learnRate, regularizationOfUser, regularizationOfItem, factorCount, out P, out Q);

            List<Tuple<int, int, double>> R_predicted_cache = new List<Tuple<int, int, double>>();
            foreach(var data in R_unknown.Matrix.EnumerateIndexed(Zeros.AllowSkip))
            {
                int indexOfUser = data.Item1;
                int indexOfItem = data.Item2;
                R_predicted_cache.Add(new Tuple<int, int, double>(indexOfUser, indexOfItem, P[indexOfUser].DotProduct(Q[indexOfItem])));
            }

            DataMatrix R_predicted = new DataMatrix(SparseMatrix.OfIndexed(R_unknown.UserCount,R_unknown.ItemCount,R_predicted_cache));
                //new DataMatrix(R_unknown.Matrix.PointwiseMultiply(P.Multiply(Q)));
            // TODO: should we do this? should we put it into [0,1]??? Seems zero entries are also converted into 0.5!Normalize the result
            //R_predicted.Matrix.MapInplace(x => RecSys.Core.SpecialFunctions.InverseLogit(x), Zeros.AllowSkip);
            return R_predicted;
        }
Example #9
0
        public string GetReadyForOrdinal(bool saveLoadedData = true)
        {
            if (!ReadyForNumerical) { GetReadyForNumerical(); }
            if (ReadyForOrdinal) { return "Is ready."; }

            StringBuilder log = new StringBuilder();
            Utils.StartTimer();
            log.AppendLine(Utils.PrintHeading("Prepare preferecen relation data"));

            Console.WriteLine("Converting R_train into PR_train");
            log.AppendLine("Converting R_train into PR_train");
            PR_train = PrefRelations.CreateDiscrete(R_train);

            //Console.WriteLine("Converting R_test into PR_test");
            //log.AppendLine("Converting R_test into PR_test");
            //PR_test = PrefRelations.CreateDiscrete(R_test);

            log.AppendLine(Utils.StopTimer());

            #region Prepare similarity data
            if (File.Exists(GetDataFileName("USP"))
                && File.Exists(GetDataFileName("ISP"))
                && File.Exists(GetDataFileName("SSIIP")))
            {

                Utils.StartTimer();
                Utils.PrintHeading("Load user, item, indicators variables (Pref based)");
                UserSimilaritiesOfPref = Utils.IO<SimilarityData>.LoadObject(GetDataFileName("USP"));
                ItemSimilaritiesOfPref = Utils.IO<SimilarityData>.LoadObject(GetDataFileName("ISP"));
                StrongSimilarityIndicatorsByItemPref = Utils.IO<HashSet<Tuple<int, int>>>.LoadObject(GetDataFileName("SSIIP"));
                Utils.StopTimer();
            }
            else
            {
                Utils.StartTimer();
                Utils.PrintHeading("Compute user-user similarities (Pref based)");
                Metric.GetCosineOfPrefRelations(PR_train, MaxCountOfNeighbors, 
                    StrongSimilarityThreshold, out UserSimilaritiesOfPref);
                Utils.StopTimer();

                // For the moment, we use user-wise preferences to compute
                // item-item similarities, it is not the same as user-user pref similarities
                Utils.StartTimer();
                Utils.PrintHeading("Compute item-item similarities (Pref based)");
                DataMatrix PR_userwise_preferences = new DataMatrix(PR_train.GetPositionMatrix());
                Metric.GetPearsonOfColumns(PR_userwise_preferences, MaxCountOfNeighbors, StrongSimilarityThreshold,
                    out ItemSimilaritiesOfPref, out StrongSimilarityIndicatorsByItemPref);
                Utils.StopTimer();

                if (saveLoadedData)
                {
                    Utils.IO<SimilarityData>.SaveObject(UserSimilaritiesOfPref, GetDataFileName("USP"));
                    Utils.IO<SimilarityData>.SaveObject(ItemSimilaritiesOfPref, GetDataFileName("ISP"));
                    Utils.IO<HashSet<Tuple<int,int>>>
                        .SaveObject(StrongSimilarityIndicatorsByItemPref, GetDataFileName("SSIIP"));
                }
                Utils.StopTimer();

            }
            #endregion

            

            ReadyForOrdinal = true;

            return log.ToString();
        }
Example #10
0
 public static double Evaluate(DataMatrix correctMatrix, DataMatrix predictedMatrix)
 {
     Debug.Assert(correctMatrix.NonZerosCount == predictedMatrix.NonZerosCount);
     double enumerator = (predictedMatrix.Matrix - correctMatrix.Matrix).FrobeniusNorm();
     return enumerator / Math.Sqrt(correctMatrix.NonZerosCount);
 }
Example #11
0
        // TODO: Scalar preference relations based on Bradley-Terry model
        public static PrefRelations CreateScalar(DataMatrix R)
        {
            int userCount = R.UserCount;
            int itemCount = R.ItemCount;
            PrefRelations PR = new PrefRelations(itemCount);

            // Create a preference matrix for each user
            Object lockMe = new Object();
            Parallel.ForEach(R.Users, user =>
            {
                int userIndex = user.Item1;
                RatingVector userRatings = new RatingVector(user.Item2);

                Utils.PrintEpoch("Doing user/total", userIndex, userCount);

                // The diagonal refer to the i-i item pair
                SparseMatrix userPreferences = new SparseMatrix(itemCount);

                // The diagonal is left empty!
                //SparseMatrix.OfMatrix(Matrix.Build.SparseDiagonal(itemCount, Config.Preferences.EquallyPreferred));

                // TODO: Use Vector.Map2 to replace the following two foreach loops

                // Here we need to compare each pair of items rated by this user
                foreach (Tuple<int, double> left in userRatings.Ratings)
                {
                    int leftItemIndex = left.Item1;
                    double leftItemRating = left.Item2;

                    foreach (Tuple<int, double> right in userRatings.Ratings)
                    {
                        int rightItemIndex = right.Item1;

                        // TODO: We could compute only the lower triangular, 
                        // and uppwer will be a negative mirror
                        // Let's do it directly at this stage
                        double rightItemRating = right.Item2;

                        Debug.Assert(rightItemRating != 0 && leftItemRating != 0);

                        // Skip the diagonal
                        if (leftItemIndex == rightItemIndex) { continue; }

                        userPreferences[leftItemIndex, rightItemIndex] = 0.1 * (leftItemRating - rightItemRating + 5);//(double)leftItemRating / (leftItemRating + rightItemRating);
                    }
                }

                // Because pr's upper triangular should be a mirror of the lower triangular
                Debug.Assert((userPreferences.NonZerosCount).IsEven());
                double debug1 = (Math.Pow(((SparseVector)R.GetRow(userIndex)).NonZerosCount, 2)
                    - ((SparseVector)R.GetRow(userIndex)).NonZerosCount);
                double debug2 = userPreferences.NonZerosCount;
                Debug.Assert(debug1 == debug2);

                lock (lockMe)
                {
                    // Copy similarity values from lower triangular to upper triangular
                    //pr_uid = DenseMatrix.OfMatrix(pr_uid + pr_uid.Transpose() - DenseMatrix.CreateIdentity(pr_uid.RowCount));
                    PR[userIndex] = userPreferences;
                }
            });



            return PR;
        }
Example #12
0
        public void PredictRatings(DataMatrix R_train, DataMatrix R_unknown, 
            HashSet<Tuple<int,int>> strongSimilarityIndicators, 
            Dictionary<Tuple<int, int>, List<double>> OMFDistributions, 
            double regularization, double learnRate, int maxEpoch, int ratingLevels, 
            out DataMatrix R_predicted_expectations, out DataMatrix R_predicted_mostlikely)
        {
            /************************************************************
             *   Parameterization and Initialization
            ************************************************************/
            #region Parameterization and Initialization
            int userCount = R_train.UserCount;
            int itemCount = R_train.ItemCount;
            meanByUser = R_train.GetUserMeans(); // Mean value of each user
            meanByItem = R_train.GetItemMeans(); // Mean value of each item
            this.R_train = R_train;
            this.OMFDistributions = OMFDistributions;
            R_predicted_expectations = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);
            R_predicted_mostlikely = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);


            // Initialize the weights
            this.strongSimilarityIndicators = strongSimilarityIndicators;
            featureWeightByItemItem = new Dictionary<Tuple<int, int>, double>(strongSimilarityIndicators.Count);

            // Initialize all strong item-item features
            Random rnd = new Random(Config.Seed);
            
            foreach(var strongSimilarityPair in strongSimilarityIndicators)
            {
                double randomWeight = rnd.NextDouble() * 0.01;
                featureWeightByItemItem[strongSimilarityPair] = randomWeight;
            }

            // We cache here which items have been rated by the given user
            // it will be reused in every feature update
            Dictionary<int, List<int>> itemsByUser = R_train.GetItemsByUser();

            // TODO: we actually stored more features, because some items may not be co-rated by any user
            Utils.PrintValue("# of item-item features", (featureWeightByItemItem.Count / 2).ToString());

            #endregion

            /************************************************************
             *   Learn weights from training data R_train
            ************************************************************/
            #region Learn weights from training data R_train
            double likelihood_prev = -double.MaxValue;
            for (int epoch = 0; epoch < maxEpoch; epoch++)
            {
                /************************************************************
                 *   Apply Eq. 23 and 24
                ************************************************************/
                #region Apply Eq. 23 and 24
                // Unlike NMF which uses each rating as the input for training,
                // here the set of ratings by each user is the input for each pass
                foreach (var user in R_train.Users)
                {
                    int indexOfUser = user.Item1;
                    Vector<double> ratingsOfUser = user.Item2;
                    Debug.Assert(ratingsOfUser.Storage.IsDense == false, "The user ratings should be stored in a sparse vector.");

                    List<int> itemsOfUser = itemsByUser[indexOfUser];   // Cache the items rated by this user

                    // Now we select one rating r_ui from the user's ratings R_u,
                    // and use this rating to combine with each other rating r_uj in R_u
                    // so that we can refine the weight associated to i-j item pair co-rated by this user
                    foreach (var item_i in ratingsOfUser.EnumerateIndexed(Zeros.AllowSkip))
                    {
                        int indexOfItem_i = item_i.Item1;
                        int r_ui = (int)R_train[indexOfUser, indexOfItem_i];    // The R_train should be all integers
                        double meanOfItem_i = meanByItem[indexOfItem_i];

                        // Find out the neighbors of item_i, i.e., "\vec{r}_u\r_ui" in Eq. 21
                        List<int> neighborsOfItem_i = new List<int>(itemsOfUser.Count);

                        //neighborsOfItem_i.Remove(indexOfItem_i);    // It is not a neighbor of itself

                        // Keep strong neighbors
                        foreach (int indexOfNeighbor in itemsOfUser)
                        {
                            if (strongSimilarityIndicators.Contains(new Tuple<int,int>(indexOfItem_i, indexOfNeighbor))
                                && indexOfNeighbor != indexOfItem_i)
                            {
                                neighborsOfItem_i.Add(indexOfNeighbor);
                            }
                            //else if(indexOfItem_i!=indexOfNeighbor)
                            //{
                            //    double pearson = Correlation.Pearson((SparseVector)R_train.Matrix.Column(indexOfItem_i),
                            //        (SparseVector)R_train.Matrix.Column(indexOfNeighbor));
                            //    Debug.Assert(pearson < 0.2);
                            //}
                        }

                        // Partition function Z_ui
                        double Z_ui = 0;
                        List<double> localLikelihoods = new List<double>(ratingLevels);

                        Object lockMe = new object();
                        for (int targetRating = 1; targetRating <= ratingLevels; targetRating++)
                        {
                            double Z_ui_level = OMFDistributions[new Tuple<int, int>(indexOfUser, indexOfItem_i)][targetRating-1]
                                * ComputePotential(targetRating, indexOfUser, indexOfItem_i, neighborsOfItem_i);
                            lock(lockMe)
                            {
                                Z_ui += Z_ui_level;
                            }
                        }

                        for (int targetRating = 1; targetRating <= ratingLevels; targetRating++)
                        {
                            //for (int targetRating = 1; targetRating <= ratingLevels; targetRating++)
                            //{
                            // The reason we need to compute the local likelihood for every i-j pair
                            // instead of once for i is that the weights are changing 
                            // TODO: however, it seems that the changed weights are not related to
                            // this locallikelihood, which means it can be put outside of the i-j loop?
                            // Because after we updated i, i should never be updated again by this user in this epoch
                            // TODO: so we try move it out side the j loop
                            // Experiment shows we are correct
                            double localLikelihoodOfTargetRating = ComputeLocalLikelihood(targetRating, indexOfUser,
                                indexOfItem_i, neighborsOfItem_i, Z_ui);
                            lock (lockMe)
                            {
                                localLikelihoods.Add(localLikelihoodOfTargetRating);
                            }
                        }

                        // For each neighbor item with strong correlation to item_i,
                        // update the weight w_ij
                        foreach (int indexOfItem_j in neighborsOfItem_i)
                        {
                            // As i-j and j-i correspond to the same feature, 
                            // so we train only if i < j to avoid double training
                            if (indexOfItem_i > indexOfItem_j) { continue; }

                            // If the similarity is zero then it is a weak feature and we skip it
                            // recall that we have set weak similarity to zero
                            // if (similarityByItemItem[indexOfItem_i, indexOfItem_j] == SparseMatrix.Zero) { continue; }
                            // we don't need to do this now, the filtering has been done before the loop

                            // Compute gradient Eq. 24
                            #region Compute gradients
                            double r_uj = R_train[indexOfUser, indexOfItem_j];
                            double meanOfItem_j = meanByItem[indexOfItem_j];

                            // Compute the first term in Eq.24
                            double gradientFirstTerm = ComputeCorrelationFeature(r_ui, meanOfItem_i, r_uj, meanOfItem_j);

                            // Compute the second term in Eq. 24
                            double gradientSecondTerm = 0.0;
                            for (int targetRating = 1; targetRating <= ratingLevels; targetRating++)
                            {
                                // The reason we need to compute the local likelihood for every i-j pair
                                // instead of once for i is that the weights are changing 
                                // TODO: however, it seems that the changed weights are not related to
                                // this locallikelihood, which means it can be put outside of the i-j loop?
                                // Because after we updated i, i should never be updated again by this user in this epoch
                                // TODO: so we try move it out side the j loop once the algorithm is table
                                //double localLikelihoodOfTargetRating = ComputeLocalLikelihood(targetRating, indexOfUser, indexOfItem_i, neighborsOfItem_i, Z_ui);

                                double localLikelihoodOfTargetRating = localLikelihoods[targetRating - 1];
                                double correlationFeature = ComputeCorrelationFeature(targetRating, meanOfItem_i, r_uj, meanOfItem_j);
                                gradientSecondTerm += localLikelihoodOfTargetRating * correlationFeature;
                            }

                            // Merge all terms
                            double gradient = gradientFirstTerm - gradientSecondTerm;

                            #endregion

                            #region Update weights

                            // Add regularization penalty, it should be shown in either Eq. 23 or Eq. 24
                            double weight = featureWeightByItemItem[new Tuple<int,int>( indexOfItem_i, indexOfItem_j)];
                            gradient -= regularization * weight;
                            double step = learnRate * gradient; // Add learning rate

                            // Update the weight with gradient
                            featureWeightByItemItem[new Tuple<int, int>(indexOfItem_i, indexOfItem_j)] += step;

                            // The weights are mirrored
                            featureWeightByItemItem[new Tuple<int, int>(indexOfItem_j, indexOfItem_i)] += step;

                            #endregion
                        }
                    }
                }
                #endregion

                /************************************************************
                 *   Compute the regularized sum of log local likelihoods, Eq. 20
                 *   see if it converges
                ************************************************************/
                #region Compute sum of regularized log likelihood see if it converges

                if (epoch == 0 || epoch == maxEpoch - 1 || epoch % (int)Math.Ceiling(maxEpoch * 0.1) == 4)
                //if (true)
                {
                    double likelihood_curr = 0;
                    // We compute user by user so that Z_ui can be reused
                    double sumOfLogLL = 0.0;   // sum of log local likelihoods, first term in Eq. 20
                    foreach (var user in R_train.Users)
                    {
                        int indexOfUser = user.Item1;
                        Vector<double> ratingsOfUser = user.Item2;
                        Debug.Assert(ratingsOfUser.Storage.IsDense == false, "The user ratings should be stored in a sparse vector.");

                        List<int> itemsOfUser = itemsByUser[indexOfUser];   // Cache the items rated by this user
                        double logLLOfUser = 0.0;   // The sum of all Eq. 21 of the current user

                        foreach (var item_i in ratingsOfUser.EnumerateIndexed(Zeros.AllowSkip))
                        {
                            int indexOfItem_i = item_i.Item1;
                            int r_ui = (int)R_train[indexOfUser, indexOfItem_i];    // The R_train should be all integers
                            double meanOfItem_i = meanByItem[indexOfItem_i];

                            // Find out the neighbors of item_i, i.e., "\vec{r}_u\r_ui" in Eq. 21
                            //List<int> neighborsOfItem_i = new List<int>(itemsOfUser);
                            List<int> neighborsOfItem_i = new List<int>(itemsOfUser.Count);

                            //neighborsOfItem_i.Remove(indexOfItem_i);    // It is not a neighbor of itself

                            // Remove weak neighbors
                            foreach (int indexOfNeighbor in itemsOfUser)
                            {
                                if (strongSimilarityIndicators.Contains(new Tuple<int, int>(indexOfItem_i, indexOfNeighbor))
                                    &&indexOfNeighbor!= indexOfItem_i)
                                {
                                    neighborsOfItem_i.Add(indexOfNeighbor);
                                    //neighborsOfItem_i.Remove(indexOfNeighbor);
                                }
                            }

                            // Partition function Z_ui
                            double Z_ui = 0;
                            for (int targetRating = 1; targetRating <= ratingLevels; targetRating++)
                            {
                                Z_ui += OMFDistributions[new Tuple<int, int>(indexOfUser, indexOfItem_i)][targetRating - 1]
                                    * ComputePotential(targetRating, indexOfUser, indexOfItem_i, neighborsOfItem_i);
                            }

                            // Eq. 21 for the current item i, that is for r_ui
                            double localLikelihoodOfRating_ui = ComputeLocalLikelihood(r_ui, indexOfUser, indexOfItem_i, neighborsOfItem_i, Z_ui);
                            logLLOfUser += Math.Log(localLikelihoodOfRating_ui);
                        }
                        sumOfLogLL += logLLOfUser;
                    }

                    // Eq. 20
                    double regularizedSumOfLogLL = sumOfLogLL - regularization 
                        * featureWeightByItemItem.Sum(x => x.Value * x.Value);// featureWeightByItemItem.SquaredSum();
                    likelihood_curr = regularizedSumOfLogLL;
                    Utils.PrintEpoch("Epoch", epoch, maxEpoch, "Reg sum of log LL", regularizedSumOfLogLL.ToString("0.000"));

                    double improvment = Math.Abs(likelihood_prev) - Math.Abs(likelihood_curr);
                    if (improvment < 0.001)
                    {
                        Console.WriteLine("Improvment less than 0.0001, learning stopped.");
                        break;
                    }

                    likelihood_prev = likelihood_curr;
                }


                /*
                if(epoch==0)
                {
                    likelihood_prev = likelihood_curr;
                }
                else 
                {
                    double improvment = likelihood_curr - likelihood_prev;
                    if(!(improvment < 0 && likelihood_prev < 0 && Math.Abs(improvment) > 0.001))
                    {

                    }

                    if (Math.Abslikelihood_curr - likelihood_prev < 0.0001)
                    {
                        Console.WriteLine("Improvment less than 0.0001, learning stopped.");
                        break;
                    }
                }
                */
 
                #endregion
            }
            #endregion

            /************************************************************
             *   Make predictions
            ************************************************************/
            #region Make predictions

            foreach(var user in R_unknown.Users)
            {
                int indexOfUser = user.Item1;
                Vector<double> unknownRatingsOfUser = user.Item2;
                List<int> itemsOfUser = itemsByUser[indexOfUser];

                foreach(var unknownRating in unknownRatingsOfUser.EnumerateIndexed(Zeros.AllowSkip))
                {
                    int indexOfItem = unknownRating.Item1;

                    List<int> neighborsOfItem = new List<int>(itemsOfUser.Count);
                    //neighborsOfItem.Remove(indexOfItem);    // It is not a neighbor of itself
                    // Remove weak neighbors
                    foreach (int indexOfNeighbor in itemsOfUser)
                    {
                        if (strongSimilarityIndicators.Contains(new Tuple<int, int>(indexOfItem, indexOfNeighbor))
                            && indexOfNeighbor!= indexOfItem)
                        {
                            neighborsOfItem.Add(indexOfNeighbor);
                        }
                    }

                    // Partition function Z
                    double Z_ui = 0;
                    for (int targetRating = 1; targetRating <= ratingLevels; targetRating++)
                    {
                        Z_ui += OMFDistributions[new Tuple<int, int>(indexOfUser, indexOfItem)][targetRating - 1] * ComputePotential(targetRating, indexOfUser, indexOfItem, neighborsOfItem);
                    }

                    double sumOfLikelihood = 0.0;
                    double currentMaxLikelihood = 0.0;
                    double mostlikelyRating = 0.0;
                    double expectationRating = 0.0;
                    for (int targetRating = 1; targetRating <= ratingLevels; targetRating++)
                    {
                        double likelihoodOfTargetRating = ComputeLocalLikelihood(targetRating, indexOfUser, indexOfItem, neighborsOfItem, Z_ui);

                        // Compute the most likely rating for MAE
                        if (likelihoodOfTargetRating > currentMaxLikelihood)
                        {
                            mostlikelyRating = targetRating;
                            currentMaxLikelihood = likelihoodOfTargetRating;
                        }

                        // Compute expectation for RMSE
                        expectationRating += targetRating * likelihoodOfTargetRating;

                        sumOfLikelihood += likelihoodOfTargetRating;
                    }

                    // The sum of likelihoods should be 1, maybe not that high precision though
                    Debug.Assert(Math.Abs(sumOfLikelihood - 1.0) < 0.0001);

                    R_predicted_expectations[indexOfUser, indexOfItem] = expectationRating;
                    R_predicted_mostlikely[indexOfUser, indexOfItem] = mostlikelyRating;
                }
            }

            #endregion

        }
Example #13
0
        public string RunNMFbasedOMF(int maxEpoch, double learnRate, double regularization, int factorCount,
            List<double> quantizer, int topN = 0)
        {
            if (!ReadyForNumerical) { GetReadyForNumerical(); }
            StringBuilder log = new StringBuilder();
            log.AppendLine(Utils.PrintHeading("NMF based OMF"));

            // NMF Prediction
            // Get ratings from scorer, for both train and test
            // R_all contains indexes of all ratings both train and test
            DataMatrix R_all = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);
            R_all.MergeNonOverlap(R_unknown);
            R_all.MergeNonOverlap(R_train.IndexesOfNonZeroElements());
            Utils.StartTimer();
            DataMatrix R_predictedByNMF = NMF.PredictRatings(R_train, R_all, maxEpoch,
                learnRate, regularization, factorCount);
            log.AppendLine(Utils.StopTimer());

            // OMF Prediction
            log.AppendLine(Utils.PrintHeading("Ordinal Matrix Factorization with NMF as scorer"));
            Utils.StartTimer();
            Dictionary<Tuple<int, int>, List<double>> OMFDistributionByUserItem;
            DataMatrix R_predicted;
            log.AppendLine(OMF.PredictRatings(R_train.Matrix, R_unknown.Matrix, R_predictedByNMF.Matrix,
                quantizer, out R_predicted, out OMFDistributionByUserItem));
            log.AppendLine(Utils.StopTimer());

            // Numerical Evaluation
            log.AppendLine(Utils.PrintValue("RMSE", RMSE.Evaluate(R_test, R_predicted).ToString("0.0000")));
            log.AppendLine(Utils.PrintValue("MAE", MAE.Evaluate(R_test, R_predicted).ToString("0.0000")));

            // TopN Evaluation
            if (topN != 0)
            {
                var topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R_predicted, topN);
                for (int n = 1; n <= topN; n++)
                {
                    log.AppendLine(Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")));
                }
                for (int n = 1; n <= topN; n++)
                {
                    log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")));
                }
            }

            // Save OMFDistribution to file
            if (!File.Exists(GetDataFileName("RatingOMF_")))
            {
                Utils.IO<Dictionary<Tuple<int, int>, List<double>>>.SaveObject(OMFDistributionByUserItem, GetDataFileName("RatingOMF_"));
            }

            return log.ToString();
        }
Example #14
0
        public string RunPrefNMFbasedOMF(int maxEpoch, double learnRate, double regularizationOfUser,
            double regularizationOfItem, int factorCount, List<double> quantizer, int topN)
        {
            if (!ReadyForOrdinal) { GetReadyForOrdinal(); }
            StringBuilder log = new StringBuilder();
            log.AppendLine(Utils.PrintHeading("PrefNMF based OMF"));

            // =============PrefNMF prediction on Train+Unknown============
            // Get ratings from scorer, for both train and test
            // R_all contains indexes of all ratings both train and test
           // DataMatrix R_all = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);
           // R_all.MergeNonOverlap(R_unknown);
            //R_all.MergeNonOverlap(R_train.IndexesOfNonZeroElements());
            //PrefRelations PR_unknown = PrefRelations.CreateDiscrete(R_all);

            // R_all is far too slow, change the data structure
            //Dictionary<int, List<Tuple<int, int>>> PR_unknown = new Dictionary<int, List<Tuple<int, int>>>();
            //Dictionary<int, List<int>> PR_unknown_cache = new Dictionary<int, List<int>>();
            Dictionary<int, List<int>>  ItemsByUser_train = R_train.GetItemsByUser();
            Dictionary<int, List<int>>  ItemsByUser_unknown = R_unknown.GetItemsByUser();
            Dictionary<int, List<int>> PR_unknown = new Dictionary<int, List<int>>(ItemsByUser_train);
            List<int> keys = new List<int>(ItemsByUser_train.Keys);
            foreach(var key in keys)
            {
                PR_unknown[key].AddRange(ItemsByUser_unknown[key]);
            }

            /*
            foreach (var row in R_unknown.Matrix.EnumerateRowsIndexed())
            {
                int indexOfUser = row.Item1;
                PR_unknown_cache[indexOfUser] = new List<int>();
                Vector<double> itemsOfUser = row.Item2;
                foreach (var item in itemsOfUser.EnumerateIndexed(Zeros.AllowSkip))
                {
                    PR_unknown_cache[indexOfUser].Add(item.Item1);
                }
            }
            foreach (var row in R_train.Matrix.EnumerateRowsIndexed())
            {
                int indexOfUser = row.Item1;
                Vector<double> itemsOfUser = row.Item2;
                foreach (var item in itemsOfUser.EnumerateIndexed(Zeros.AllowSkip))
                {
                    PR_unknown_cache[indexOfUser].Add(item.Item1);
                }
            }
            */


            Utils.StartTimer();
            SparseMatrix PR_predicted = PrefNMF.PredictPrefRelations(PR_train, PR_unknown,
                maxEpoch, learnRate, regularizationOfUser, regularizationOfItem, factorCount, quantizer);

            // Both predicted and train need to be quantized
            // otherwise OMF won't accept
            //PR_predicted.quantization(0, 1.0,
             //   new List<double> { Config.Preferences.LessPreferred, 
            //            Config.Preferences.EquallyPreferred, Config.Preferences.Preferred });
            DataMatrix R_predictedByPrefNMF = new DataMatrix(PR_predicted);// new DataMatrix(PR_predicted.GetPositionMatrix());

            // PR_train itself is already in quantized form!
            //PR_train.quantization(0, 1.0, new List<double> { Config.Preferences.LessPreferred, Config.Preferences.EquallyPreferred, Config.Preferences.Preferred });
            DataMatrix R_train_positions = new DataMatrix(PR_train.GetPositionMatrix());
            R_train_positions.Quantization(quantizer[0], quantizer[quantizer.Count - 1] - quantizer[0], quantizer);
            log.AppendLine(Utils.StopTimer());

            // =============OMF prediction on Train+Unknown============
            log.AppendLine(Utils.PrintHeading("Ordinal Matrix Factorization with PrefNMF as scorer"));
            Utils.StartTimer();
            Dictionary<Tuple<int, int>, List<double>> OMFDistributionByUserItem;
            DataMatrix R_predicted;
            log.AppendLine(OMF.PredictRatings(R_train_positions.Matrix, R_unknown.Matrix, R_predictedByPrefNMF.Matrix,
                quantizer, out R_predicted, out OMFDistributionByUserItem));
            log.AppendLine(Utils.StopTimer());

            // TopN Evaluation
            var topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R_predicted, topN);
            for (int n = 1; n <= topN; n++)
            {
                log.AppendLine(Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")));
            }
            for (int n = 1; n <= topN; n++)
            {
                log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")));
            }

            // Save OMFDistribution to file
            if (!File.Exists(GetDataFileName("PrefOMF_")))
            {
                Utils.IO<Dictionary<Tuple<int, int>, List<double>>>.SaveObject(OMFDistributionByUserItem, GetDataFileName("PrefOMF_"));
            }

            return log.ToString();
        }
Example #15
0
        public static DataMatrix PredictRatings(DataMatrix R_train, DataMatrix R_unknown,
            int maxEpoch, double learnRate, double regularization, int factorCount)
        {
            int userCount = R_train.UserCount;
            int itemCount = R_train.ItemCount;
            int ratingCount = R_train.NonZerosCount;
            double meanOfGlobal = R_train.GetGlobalMean();
            DataMatrix R_train_unknown = R_train.IndexesOfNonZeroElements();  // For testing convergence

            // User latent vectors with default seed
            Matrix<double> P = Utils.CreateRandomMatrixFromNormal(userCount, factorCount, 0, 0.1, Config.Seed);
           // Matrix<double> P = Utils.CreateRandomMatrixFromUniform(userCount, factorCount, 0, 0.1, Config.Seed);
            // Item latent vectors with a different seed
            Matrix<double> Q = Utils.CreateRandomMatrixFromNormal(factorCount, itemCount, 0, 0.1, Config.Seed + 1);
            //Matrix<double> Q = Utils.CreateRandomMatrixFromUniform(factorCount, itemCount, 0, 0.1, Config.Seed + 1);

            // SGD
            double e_prev = double.MaxValue;
            for (int epoch = 0; epoch < maxEpoch; ++epoch)
            {
                foreach (Tuple<int, int, double> element in R_train.Ratings)
                {
                    int indexOfUser = element.Item1;
                    int indexOfItem = element.Item2;
                    double rating = element.Item3;

                    double e_ij = rating - (meanOfGlobal + P.Row(indexOfUser).DotProduct(Q.Column(indexOfItem)));
                    
                    // Update feature vectors
                    Vector<double> P_u = P.Row(indexOfUser);
                    Vector<double> Q_i = Q.Column(indexOfItem);

                    Vector<double> P_u_updated = P_u + (Q_i.Multiply(e_ij) - P_u.Multiply(regularization)).Multiply(learnRate);
                    P.SetRow(indexOfUser, P_u_updated);

                    Vector<double> Q_i_updated = Q_i + (P_u.Multiply(e_ij) - Q_i.Multiply(regularization)).Multiply(learnRate);
                    Q.SetColumn(indexOfItem, Q_i_updated);

                    #region Update feature vectors loop version
                    /*
                    // Update feature vectors
                    for (int k = 0; k < factorCount; ++k)
                    {
                        double factorOfUser = P[indexOfUser, k];
                        double factorOfItem = Q[k, indexOfItem];

                        P[indexOfUser, k] += learnRate * (e_ij * factorOfItem - regularization * factorOfUser);
                        Q[k, indexOfItem] += learnRate * (e_ij * factorOfUser - regularization * factorOfItem);
                    }
                    */
                    #endregion
                }

                // Display the current regularized error see if it converges

                double e_curr = 0;
                if (epoch == 0 || epoch == maxEpoch - 1 || epoch % (int)Math.Ceiling(maxEpoch * 0.1) == 4)
                {
                    Matrix<double> predictedMatrix = R_train_unknown.PointwiseMultiply(P.Multiply(Q));
                    SparseMatrix correctMatrix = R_train.Matrix;
                    double squaredError = (correctMatrix - predictedMatrix).SquaredSum();
                    double regularizationPenaty = regularization * (P.SquaredSum() + Q.SquaredSum());
                    double objective = squaredError + regularizationPenaty;

                    #region Linear implementation
                    /*
                    double e = 0;
                    foreach (Tuple<int, int, double> element in R_train.Ratings)
                    {
                        int indexOfUser = element.Item1;
                        int indexOfItem = element.Item2;
                        double rating = element.Item3;

                        e += Math.Pow(rating - P.Row(indexOfUser).DotProduct(Q.Column(indexOfItem)), 2);

                        for (int k = 0; k < factorCount; ++k)
                        {
                            e += (regularization / 2) * (Math.Pow(P[indexOfUser, k], 2) + Math.Pow(Q[k, indexOfItem], 2));
                        }
                    }
                    */
                    #endregion

                    // Record the current error
                    e_curr = objective;

                    // Stop the learning if the regularized error falls below a certain threshold
                    if (e_prev - e_curr < 0.001)
                    {
                        Console.WriteLine("Improvment less than 0.001, learning stopped.");
                        break;
                    }
                    e_prev = e_curr;

                    Utils.PrintEpoch("Epoch", epoch, maxEpoch, "Objective cost", objective);
                }

            }

            SparseMatrix R_predicted = new SparseMatrix(R_unknown.UserCount, R_unknown.ItemCount);
            foreach(var element in R_unknown.Matrix.EnumerateIndexed(Zeros.AllowSkip))
            {
                int indexOfUser = element.Item1;
                int indexOfItem = element.Item2;
                double r_predicted = meanOfGlobal + P.Row(indexOfUser) * Q.Column(indexOfItem);

                if (r_predicted > Config.Ratings.MaxRating) r_predicted = Config.Ratings.MaxRating;
                if (r_predicted < Config.Ratings.MinRating) r_predicted = Config.Ratings.MinRating;

                R_predicted[indexOfUser, indexOfItem] = r_predicted;
            }
            return new DataMatrix(R_predicted);
            //return new RatingMatrix(R_unknown.PointwiseMultiply(P.Multiply(Q)));
        }
Example #16
0
        public string GetReadyForNumerical(bool saveLoadedData = true)
        {
            if (ReadyForNumerical) { return "Is ready."; }

            StringBuilder log = new StringBuilder();
            Utils.StartTimer();

            log.AppendLine(Utils.PrintHeading("Create R_train/R_test sets from " + DataSetFile));
            Utils.LoadMovieLensSplitByCount(DataSetFile, out R_train,
                out R_test, MinCountOfRatings, MaxCountOfRatings, CountOfRatingsForTrain, ShuffleData, Seed);

            Console.WriteLine(R_train.DatasetBrief("Train set"));
            Console.WriteLine(R_test.DatasetBrief("Test set"));
            log.AppendLine(R_train.DatasetBrief("Train set"));
            log.AppendLine(R_test.DatasetBrief("Test set"));

            R_unknown = R_test.IndexesOfNonZeroElements();

            log.AppendLine(Utils.PrintValue("Relevant item criteria", RelevantItemCriteria.ToString("0.0")));
            RelevantItemsByUser = ItemRecommendationCore.GetRelevantItemsByUser(R_test, RelevantItemCriteria);
            log.AppendLine(Utils.PrintValue("Mean # of relevant items per user",
                RelevantItemsByUser.Average(k => k.Value.Count).ToString("0")));
            log.AppendLine(Utils.StopTimer());

            #region Prepare similarity data
            if (File.Exists(GetDataFileName("USR"))
                && File.Exists(GetDataFileName("ISR"))
                && File.Exists(GetDataFileName("SSIIR")))
            {
                Utils.StartTimer();
                Utils.PrintHeading("Load user-user similarities (rating based)");
                UserSimilaritiesOfRating = Utils.IO<SimilarityData>.LoadObject(GetDataFileName("USR"));
                Utils.StopTimer();

                Utils.StartTimer();
                Utils.PrintHeading("Load item-item similarities (rating based)");
                ItemSimilaritiesOfRating = Utils.IO<SimilarityData>.LoadObject(GetDataFileName("ISR"));
                Utils.StopTimer();

                Utils.StartTimer();
                Utils.PrintHeading("Load item-item strong similarity indicators (rating based)");
                StrongSimilarityIndicatorsByItemRating = Utils.IO<HashSet<Tuple<int, int>>>.LoadObject(GetDataFileName("SSIIR"));
                Utils.StopTimer();
            }
            else
            {
                Utils.StartTimer();
                Utils.PrintHeading("Compute user-user similarities (rating based)");
                Metric.GetPearsonOfRows(R_train, MaxCountOfNeighbors,StrongSimilarityThreshold,
                    out UserSimilaritiesOfRating);
                if (saveLoadedData) 
                {
                    Utils.IO<SimilarityData>.SaveObject(UserSimilaritiesOfRating, GetDataFileName("USR"));
                }
                Utils.StopTimer();

                Utils.StartTimer();
                Utils.PrintHeading("Compute item-item similarities (rating based)");
                Metric.GetPearsonOfColumns(R_train, MaxCountOfNeighbors, StrongSimilarityThreshold, 
                    out ItemSimilaritiesOfRating, out StrongSimilarityIndicatorsByItemRating);
                if (saveLoadedData)
                {
                    Utils.IO<SimilarityData>.SaveObject(ItemSimilaritiesOfRating, GetDataFileName("ISR"));
                    Utils.IO<HashSet<Tuple<int,int>>>
                        .SaveObject(StrongSimilarityIndicatorsByItemRating, GetDataFileName("SSIIR"));
                }
                Utils.StopTimer();
            }
            #endregion

            ReadyForNumerical = true;

            return log.ToString();
        }
Example #17
0
 /// <summary>
 /// Merge another matrix to this matrix. It is required that two matrixes do not overlap
 /// </summary>
 /// <param name="matrix"></param>
 public void MergeNonOverlap(DataMatrix matrix)
 {
     int count = ratingMatrix.NonZerosCount;
     ratingMatrix += matrix.Matrix;
     Debug.Assert(count + matrix.Matrix.NonZerosCount == ratingMatrix.NonZerosCount);
 }
Example #18
0
        public static DataMatrix PredictRatings(PrefRelations PR_train,
            DataMatrix R_unknown, int K, SimilarityData neighborsByUser)
        {
            Debug.Assert(PR_train.UserCount == R_unknown.UserCount);
            Debug.Assert(PR_train.ItemCount == R_unknown.ItemCount);

            // This matrix stores predictions
            DataMatrix R_predicted = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);

            // This can be considered as the R_train in standard UserKNN
            SparseMatrix positionMatrix = PR_train.GetPositionMatrix();
            DataMatrix ratingMatrixFromPositions = new DataMatrix(positionMatrix);

            Vector<double> meanByUser = ratingMatrixFromPositions.GetUserMeans();
            Vector<double> meanByItem = ratingMatrixFromPositions.GetItemMeans();
            double globalMean = ratingMatrixFromPositions.GetGlobalMean();

            // Predict positions for each test user
            // Appears to be very fast, parallel.foreach is unnecessary
            foreach (Tuple<int, Vector<double>> user in R_unknown.Users)
            {
                int indexOfUser = user.Item1;
                Vector<double> indexesOfUnknownRatings = user.Item2;

                Utils.PrintEpoch("Predicting user/total", indexOfUser, PR_train.UserCount);

                // Note that there are more than K neighbors in the list (sorted by similarity)
                // we will use the top-K neighbors WHO HAVE RATED THE ITEM
                // For example we have 200 top neighbors, and we hope there are
                // K neighbors in the list have rated the item. We can't keep
                // everyone in the neighbor list because there are too many for large data sets
                var topNeighborsOfUser = neighborsByUser[indexOfUser];

                double meanOfUser = meanByUser[indexOfUser];

                // Loop through each position to be predicted
                foreach (Tuple<int, double> unknownRating in indexesOfUnknownRatings.EnumerateIndexed(Zeros.AllowSkip))
                {
                    int indexOfUnknownItem = unknownRating.Item1;

                    // Compute the position of this item for the user
                    // by combining neighbors' positions on this item
                    double weightedSum = 0;
                    double weightSum = 0;
                    int currentTopKCount = 0;
                    foreach (KeyValuePair<int, double> neighbor in topNeighborsOfUser)
                    {
                        int indexOfNeighbor = neighbor.Key;
                        double similarityOfNeighbor = neighbor.Value;
                        double itemPositionOfNeighbor = ratingMatrixFromPositions[indexOfNeighbor, indexOfUnknownItem];

                        // We count only if the neighbor has seen this item before
                        if (itemPositionOfNeighbor != 0)
                        {
                            // Recall that we use a constant to hold position value 0
                            // we revert it back here
                            if (itemPositionOfNeighbor == Config.ZeroInSparseMatrix)
                            {
                                Debug.Assert(true, "By using the PositionShift constant, we should not be in here.");
                                itemPositionOfNeighbor = 0;
                            }
                            weightSum += similarityOfNeighbor;
                            weightedSum += (itemPositionOfNeighbor - meanByUser[indexOfNeighbor]) * similarityOfNeighbor;
                            currentTopKCount++;
                            if(currentTopKCount>= K)
                            {
                                break;
                            }
                        }
                    }

                    // If any neighbor has seen this item
                    if (currentTopKCount != 0)
                    {
                        // TODO: Add user mean may improve the performance
                        R_predicted[indexOfUser, indexOfUnknownItem] = meanOfUser + weightedSum / weightSum;
                    }
                    else
                    {
                        R_predicted[indexOfUser, indexOfUnknownItem] = globalMean;
                    }
                }
            }//);
            return R_predicted;
        }
Example #19
0
        /// <summary>
        /// The user-based KNN collaborative filtering described in paper:
        /// Resnick, P., et al., "GroupLens: an open architecture for collaborative filtering of netnews", 1994.
        /// Link: http://dx.doi.org/10.1145/192844.192905
        /// </summary>
        /// <param name="R_train"></param>
        /// <param name="R_unknown"></param>
        /// <param name="K"></param>
        /// <returns></returns>
        public static DataMatrix PredictRatings(DataMatrix R_train, DataMatrix R_unknown, SimilarityData neighborsByUser, int K)
        {
            // Debug
            Debug.Assert(R_train.UserCount == R_unknown.UserCount);
            Debug.Assert(R_train.ItemCount == R_unknown.ItemCount);
            int cappedCount = 0, globalMeanCount = 0;

            // This matrix stores predictions
            DataMatrix R_predicted = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);

            // Basic statistics from train set
            double          globalMean = R_train.GetGlobalMean();
            Vector <double> meanByUser = R_train.GetUserMeans();
            Vector <double> meanByItem = R_train.GetItemMeans();

            // Predict ratings for each test user
            // Single thread appears to be very fast, parallel.foreach is unnecessary
            Object lockMe = new Object();

            Parallel.ForEach(R_unknown.Users, user =>
            {
                int indexOfUser             = user.Item1;
                RatingVector userRatings    = new RatingVector(R_train.GetRow(indexOfUser));
                RatingVector unknownRatings = new RatingVector(user.Item2);

                Utils.PrintEpoch("Predicting user/total", indexOfUser, R_train.UserCount);

                // Note that there are more than K neighbors in the list (sorted by similarity)
                // we will use the top-K neighbors WHO HAVE RATED THE ITEM
                // For example we have 200 top neighbors, and we hope there are
                // K neighbors in the list have rated the item. We can't keep
                // everyone in the neighbor list because there are too many for large data sets
                var topNeighborsOfUser = neighborsByUser[indexOfUser];
                //Dictionary<int, double> topKNeighbors = KNNCore.GetTopKNeighborsByUser(userSimilarities, indexOfUser, K);

                double meanOfUser = meanByUser[indexOfUser];

                // Loop through each ratingto be predicted
                foreach (Tuple <int, double> unknownRating in unknownRatings.Ratings)
                {
                    int itemIndex = unknownRating.Item1;
                    double prediction;

                    // TODO: we actually should use the Top-K neighbors
                    // that have rated this item, otherwise we may have
                    // only a few neighbors rated this item

                    // Compute the average rating on item iid given
                    // by the top K neighbors. Each rating is offsetted by
                    // the neighbor's average and weighted by the similarity
                    double weightedSum   = 0;
                    double weightSum     = 0;
                    int currentTopKCount = 0;
                    foreach (KeyValuePair <int, double> neighbor in topNeighborsOfUser)
                    {
                        int neighborIndex           = neighbor.Key;
                        double similarityOfNeighbor = neighbor.Value;
                        double itemRatingOfNeighbor = R_train[neighborIndex, itemIndex];

                        // We count only if the neighbor has seen this item before
                        if (itemRatingOfNeighbor != 0)
                        {
                            weightSum   += similarityOfNeighbor;
                            weightedSum += (itemRatingOfNeighbor - meanByUser[neighborIndex]) * similarityOfNeighbor;
                            currentTopKCount++;
                            if (currentTopKCount >= K)
                            {
                                break;
                            }                                       // Stop when we have seen K neighbors
                        }
                    }
                    // A zero weightedSum means this is a cold item and global mean will be assigned by default
                    if (weightedSum != 0)
                    {
                        prediction = meanOfUser + weightedSum / weightSum;
                    }
                    else
                    {
                        prediction = globalMean;
                        globalMeanCount++;
                    }

                    // Cap the ratings
                    if (prediction > Config.Ratings.MaxRating)
                    {
                        cappedCount++;
                        prediction = Config.Ratings.MaxRating;
                    }
                    if (prediction < Config.Ratings.MinRating)
                    {
                        cappedCount++;
                        prediction = Config.Ratings.MinRating;
                    }

                    lock (lockMe)
                    {
                        R_predicted[indexOfUser, itemIndex] = prediction;
                    }
                }
            });
            Utils.PrintValue("# capped predictions", cappedCount.ToString("D"));
            Utils.PrintValue("# default predictions", globalMeanCount.ToString("D"));
            return(R_predicted);
        }
Example #20
0
 public static double Evaluate(DataMatrix correctMatrix, DataMatrix predictedMatrix)
 {
     return (correctMatrix.Matrix - predictedMatrix.Matrix)
         .ColumnAbsoluteSums().Sum() / correctMatrix.NonZerosCount;
 }
Example #21
0
        /// <summary>
        /// Recommend the most popular (measured by mean rating) items to all users.
        /// </summary>
        public string RunMostPopular(int topN)
        {
            if (!ReadyForNumerical) { GetReadyForNumerical(); }
            StringBuilder log = new StringBuilder();
            log.AppendLine(Utils.PrintHeading("Most popular"));

            // Prediction
            Utils.StartTimer();
            var meanByItem = R_train.GetItemMeans();
            DataMatrix R_predicted = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);
            foreach (var element in R_unknown.Matrix.EnumerateIndexed(Zeros.AllowSkip))
            {
                int indexOfUser = element.Item1;
                int indexOfItem = element.Item2;
                R_predicted[indexOfUser, indexOfItem] = meanByItem[indexOfItem];
            }
            var topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R_predicted, topN);
            log.AppendLine(Utils.StopTimer());

            // TopN Evaluation
            for (int n = 1; n <= topN; n++)
            {
                log.AppendLine(Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")));
            }
            for (int n = 1; n <= topN; n++)
            {
                log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")));
            }

            return log.ToString();
        }
Example #22
0
        public string RunPrefMRF(double regularization, double learnRate, int maxEpoch, List<double> quantizer,
            int topN = 10)
        {
            // Load OMFDistribution from file
            Dictionary<Tuple<int, int>, List<double>> OMFDistributionByUserItem;
            if (File.Exists(GetDataFileName("PrefOMF_")))
            {
                OMFDistributionByUserItem = Utils.IO<Dictionary<Tuple<int, int>, List<double>>>.LoadObject(GetDataFileName("PrefOMF_"));
            }
            else
            {
                return "Abort, Run OMF first.";
            }

            if (!ReadyForOrdinal) { GetReadyForOrdinal(); }
            StringBuilder log = new StringBuilder();
            log.AppendLine(Utils.PrintHeading("PrefMRF: PrefNMF based ORF"));

            // Prediction
            Utils.StartTimer();
            DataMatrix R_predicted_expectations;
            DataMatrix R_predicted_mostlikely;

            // Convert PR_train into user-wise preferences
            DataMatrix R_train_positions = new DataMatrix(PR_train.GetPositionMatrix());
            R_train_positions.Quantization(quantizer[0], quantizer[quantizer.Count - 1] - quantizer[0], quantizer);

            ORF orf = new ORF();
            orf.PredictRatings( R_train_positions, R_unknown, StrongSimilarityIndicatorsByItemPref,
                OMFDistributionByUserItem, regularization, learnRate, maxEpoch, 
                quantizer.Count, out R_predicted_expectations, out R_predicted_mostlikely);
          
            log.AppendLine(Utils.StopTimer());

            // Evaluation
            var topNItemsByUser_expectations = ItemRecommendationCore.GetTopNItemsByUser(R_predicted_expectations, topN);
            for (int n = 1; n <= topN; n++)
            {
                log.AppendLine(Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser,
                    topNItemsByUser_expectations, n).ToString("0.0000")));
            }
            for (int n = 1; n <= topN; n++)
            {
                log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser_expectations, n).ToString("0.0000")));
            }

            return log.ToString();
        }
Example #23
0
        /// <summary>
        /// The user-based KNN collaborative filtering described in paper: 
        /// Resnick, P., et al., "GroupLens: an open architecture for collaborative filtering of netnews", 1994.
        /// Link: http://dx.doi.org/10.1145/192844.192905
        /// </summary>
        /// <param name="R_train"></param>
        /// <param name="R_unknown"></param>
        /// <param name="K"></param>
        /// <returns></returns>
        public static DataMatrix PredictRatings(DataMatrix R_train, DataMatrix R_unknown, SimilarityData neighborsByUser, int K)
        {
            // Debug
            Debug.Assert(R_train.UserCount == R_unknown.UserCount);
            Debug.Assert(R_train.ItemCount == R_unknown.ItemCount);
            int cappedCount = 0, globalMeanCount = 0;

            // This matrix stores predictions
            DataMatrix R_predicted = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);

            // Basic statistics from train set
            double globalMean = R_train.GetGlobalMean();
            Vector<double> meanByUser = R_train.GetUserMeans();
            Vector<double> meanByItem = R_train.GetItemMeans();

            // Predict ratings for each test user
            // Single thread appears to be very fast, parallel.foreach is unnecessary
            Object lockMe = new Object();
            Parallel.ForEach(R_unknown.Users, user =>
            {
                int indexOfUser = user.Item1;
                RatingVector userRatings = new RatingVector(R_train.GetRow(indexOfUser));
                RatingVector unknownRatings = new RatingVector(user.Item2);

                Utils.PrintEpoch("Predicting user/total", indexOfUser, R_train.UserCount);

                // Note that there are more than K neighbors in the list (sorted by similarity)
                // we will use the top-K neighbors WHO HAVE RATED THE ITEM
                // For example we have 200 top neighbors, and we hope there are
                // K neighbors in the list have rated the item. We can't keep
                // everyone in the neighbor list because there are too many for large data sets
                var topNeighborsOfUser = neighborsByUser[indexOfUser];
                //Dictionary<int, double> topKNeighbors = KNNCore.GetTopKNeighborsByUser(userSimilarities, indexOfUser, K);

                double meanOfUser = meanByUser[indexOfUser];

                // Loop through each ratingto be predicted
                foreach (Tuple<int, double> unknownRating in unknownRatings.Ratings)
                {
                    int itemIndex = unknownRating.Item1;
                    double prediction;

                    // TODO: we actually should use the Top-K neighbors
                    // that have rated this item, otherwise we may have
                    // only a few neighbors rated this item

                    // Compute the average rating on item iid given 
                    // by the top K neighbors. Each rating is offsetted by
                    // the neighbor's average and weighted by the similarity
                    double weightedSum = 0;
                    double weightSum = 0;
                    int currentTopKCount = 0;
                    foreach (KeyValuePair<int, double> neighbor in topNeighborsOfUser)
                    {
                        int neighborIndex = neighbor.Key;
                        double similarityOfNeighbor = neighbor.Value;
                        double itemRatingOfNeighbor = R_train[neighborIndex, itemIndex];

                        // We count only if the neighbor has seen this item before
                        if (itemRatingOfNeighbor != 0)
                        {
                            weightSum += similarityOfNeighbor;
                            weightedSum += (itemRatingOfNeighbor - meanByUser[neighborIndex]) * similarityOfNeighbor;
                            currentTopKCount++;
                            if (currentTopKCount >= K) { break; }   // Stop when we have seen K neighbors
                        }
                    }
                    // A zero weightedSum means this is a cold item and global mean will be assigned by default
                    if (weightedSum != 0)
                    {
                        prediction = meanOfUser + weightedSum / weightSum;
                    }
                    else
                    {
                        prediction = globalMean;
                        globalMeanCount++;
                    }

                    // Cap the ratings
                    if (prediction > Config.Ratings.MaxRating)
                    {
                        cappedCount++;
                        prediction = Config.Ratings.MaxRating;
                    }
                    if (prediction < Config.Ratings.MinRating)
                    {
                        cappedCount++;
                        prediction = Config.Ratings.MinRating;
                    }

                    lock (lockMe)
                    {
                        R_predicted[indexOfUser, itemIndex] = prediction;
                    }
                }
            });
            Utils.PrintValue("# capped predictions", cappedCount.ToString("D"));
            Utils.PrintValue("# default predictions", globalMeanCount.ToString("D"));
            return R_predicted;
        }
Example #24
0
        /// <summary>
        /// Ordinal Matrix Factorization.
        /// </summary>
        /// <param name="R_train">The matrix contains training ratings</param>
        /// <param name="R_unknown">The matrix contains ones indicating unknown ratings</param>
        /// <param name="R_scorer">This matrix contains ratings predicted by the scorer on
        /// both the R_train and R_unknown sets</param>
        /// <returns>The predicted ratings on R_unknown</returns>
        #region PredictRatings
        public static string PredictRatings(SparseMatrix R_train, SparseMatrix R_unknown,
 SparseMatrix R_scorer, List<double> quantizer, out DataMatrix R_predicted,
            out Dictionary<Tuple<int, int>, List<double>> OMFDistributionByUserItem)
        {
            StringBuilder log = new StringBuilder();
            /************************************************************
             *   Parameterization and Initialization
            ************************************************************/
            #region Parameterization and Initialization
            // This matrix stores predictions
            SparseMatrix R_predicted_out = (SparseMatrix)Matrix.Build.Sparse(R_unknown.RowCount, R_unknown.ColumnCount);
            Dictionary<Tuple<int, int>, List<double>> OMFDistributionByUserItem_out =
                new Dictionary<Tuple<int, int>, List<double>>(R_unknown.NonZerosCount);

            // User specified parameters
            double maxEpoch = Config.OMF.MaxEpoch;
            double learnRate = Config.OMF.LearnRate;
            double regularization = Config.OMF.Regularization;
            int intervalCount = quantizer.Count;
            int userCount = R_train.RowCount;

            // Parameters for each user
            Dictionary<int, ParametersOfUser> paramtersByUser = new Dictionary<int, ParametersOfUser>(R_train.RowCount);

            // Compute initial values of t1 and betas 
            // that will be used for all users, Eq. 5
            double t1_initial = (double)(quantizer[0] + quantizer[1]) / 2;
            Vector<double> betas_initial = Vector.Build.Dense(quantizer.Count - 2);
            for (int i = 1; i <= betas_initial.Count; i++)
            {
                double t_r = t1_initial;
                double t_r_plus_1 = (quantizer[i] + quantizer[i + 1]) * 0.5f;
                betas_initial[i - 1] = Math.Log(t_r_plus_1 - t_r); // natural base
                t_r = t_r_plus_1;
            }

            // Initialize parameters (t1, betas) for each user
            for (int indexOfUser = 0; indexOfUser < R_train.RowCount; indexOfUser++)
            {
                paramtersByUser[indexOfUser] = new ParametersOfUser(t1_initial, betas_initial);
            }
            #endregion

            /************************************************************
             *   Learn parameters from training data R_train and R_score
            ************************************************************/
            #region Learn parameters from training data R_train and R_score
            // Learn parameters for each user, note that each user has his own model
            Object lockMe = new Object();
            Parallel.ForEach(R_train.EnumerateRowsIndexed(), row =>
            {
                int indexOfUser = row.Item1;
                SparseVector ratingsOfUser = (SparseVector)row.Item2;

                // Store this user's ratings from R_train and correpsonding ratings from scorer
                List<double> ratingsFromScorer = new List<double>(ratingsOfUser.NonZerosCount);
                List<double> ratingsFromRTrain = new List<double>(ratingsOfUser.NonZerosCount);
                foreach (var element in ratingsOfUser.EnumerateIndexed(Zeros.AllowSkip))
                {
                    int indexOfItem = element.Item1;
                    double rating = element.Item2;
                    // Ratings need to be added in the same order
                    ratingsFromScorer.Add(R_scorer[indexOfUser, indexOfItem]);
                    ratingsFromRTrain.Add(rating);
                }

                Debug.Assert(ratingsFromScorer.Count == ratingsOfUser.NonZerosCount);
                Debug.Assert(ratingsFromRTrain.Count == ratingsOfUser.NonZerosCount);

                // Parameters for the current user are estimated by
                // maximizing the log likelihood (Eq. 21) using stochastic gradient ascent
                // Eq. 22
                double t1 = paramtersByUser[indexOfUser].t1;
                Vector<double> betas = paramtersByUser[indexOfUser].betas;
                for (int epoch = 0; epoch < maxEpoch; epoch++)
                {
                    for (int i = 0; i < ratingsFromRTrain.Count; i++)
                    {
                        double ratingFromRTrain = ratingsFromRTrain[i];
                        double ratingFromScorer = ratingsFromScorer[i];

                        int r = quantizer.IndexOf(ratingFromRTrain);    // r is the interval that the rating falls into
                        double probLE_r = ComputeProbLE(ratingFromScorer, r, t1, betas);   // Eq. 9
                        double probLE_r_minus_1 = ComputeProbLE(ratingFromScorer, r - 1, t1, betas);
                        double probE_r = probLE_r - probLE_r_minus_1;    // Eq. 10

                        // Compute derivatives/gradients
                        double derivativeOft1 = learnRate / probE_r * (probLE_r * (1 - probLE_r) * DerivativeOfBeta(r, 0, t1)
                                - probLE_r_minus_1 * (1 - probLE_r_minus_1) * DerivativeOfBeta(r - 1, 0, t1)
                                - Config.OMF.Regularization * t1);

                        Vector<double> derivativesOfbetas = Vector.Build.Dense(betas.Count);
                        for (int k = 0; k < betas.Count; k++)
                        {
                            derivativesOfbetas[k] = learnRate / probE_r * (probLE_r * (1 - probLE_r) *
                                    DerivativeOfBeta(r, k + 1, betas[k]) - probLE_r_minus_1 * (1 - probLE_r_minus_1) *
                                    DerivativeOfBeta(r - 1, k + 1, betas[k]) - regularization * betas[k]);
                        }

                        // Update parameters
                        t1 += derivativeOft1;
                        betas += derivativesOfbetas;
                    }
                }

                // Store the leanred paramemters
                lock (lockMe)
                {
                    paramtersByUser[indexOfUser].t1 = t1;
                    paramtersByUser[indexOfUser].betas = betas;
                }

                log.AppendLine(Utils.PrintEpoch("user/total", indexOfUser, userCount, "Learned params",
                    String.Format("t1={0:0.000},betas={1}", t1, string.Concat(
                    betas.Select(i => string.Format("{0:0.00},", i))))));
            });
            #endregion

            /************************************************************
             *   Make predictions using learned parameters
            ************************************************************/
            #region Make predictions using learned parameters
            lockMe = new Object();
            Parallel.ForEach(R_unknown.EnumerateIndexed(Zeros.AllowSkip), element =>
            {
                int indexOfUser = element.Item1;
                int indexOfItem = element.Item2;
                // This is the ordinal distribution of the current user
                // given the internal score by MF
                // e.g. what is the probability of each rating 1-5
                List<double> probabilitiesByInterval = new List<double>(quantizer.Count);
                double scoreFromScorer = R_scorer[indexOfUser, indexOfItem];
                double pre = ComputeProbLE(scoreFromScorer, 0, paramtersByUser[indexOfUser].t1, paramtersByUser[indexOfUser].betas);
                probabilitiesByInterval.Add( pre);
                for (int i = 1; i < intervalCount; i++)
                {
                    double pro = ComputeProbLE(scoreFromScorer, i, paramtersByUser[indexOfUser].t1, paramtersByUser[indexOfUser].betas);
                    probabilitiesByInterval.Add( pro - pre);
                    pre = pro;
                }

                // Compute smoothed expectation for RMSE metric
                double expectationRating = 0.0;
                for (int i = 0; i < probabilitiesByInterval.Count; i++)
                {
                    expectationRating += (i + 1) * probabilitiesByInterval[i];
                }

                // TODO: Compute most likely value for MAE metric

                lock (lockMe)
                {
                    // Keep OMF Distribution
                    OMFDistributionByUserItem_out[new Tuple<int, int>(indexOfUser, indexOfItem)] = probabilitiesByInterval;
                    // Keep the numerical prediction
                    R_predicted_out[indexOfUser, indexOfItem] = expectationRating;
                }
            });
            #endregion


            /************************************************************
             *   Generate OMF distributions for R_train as well
            ************************************************************/
            #region Generate OMF distributions for R_train as well
            lockMe = new Object();
            Parallel.ForEach(R_train.EnumerateIndexed(Zeros.AllowSkip), element =>
            {
                int indexOfUser = element.Item1;
                int indexOfItem = element.Item2; ;
                // This is the ordinal distribution of the current user
                // given the internal score by MF
                // e.g. what is the probability of each rating 1-5
                List<double> probabilitiesByInterval = new List<double>(quantizer.Count);
                double scoreFromScorer = R_scorer[indexOfUser, indexOfItem];
                double pre = ComputeProbLE(scoreFromScorer, 0, paramtersByUser[indexOfUser].t1, paramtersByUser[indexOfUser].betas);
                probabilitiesByInterval.Add(pre);
                for (int i = 1; i < intervalCount; i++)
                {
                    double pro = ComputeProbLE(scoreFromScorer, i, paramtersByUser[indexOfUser].t1, paramtersByUser[indexOfUser].betas);
                    probabilitiesByInterval.Add(pro - pre);
                    pre = pro;
                }

                lock (lockMe)
                {
                    // Keep OMF Distribution
                    OMFDistributionByUserItem_out[new Tuple<int, int>(indexOfUser, indexOfItem)] = probabilitiesByInterval;
                }
            });
            #endregion

            R_predicted = new DataMatrix(R_predicted_out);
            OMFDistributionByUserItem = OMFDistributionByUserItem_out;

            return log.ToString();
        }