// Get the relevant items of each user, i.e. rated no lower than the criteria
        public static Dictionary<int, List<int>> GetRelevantItemsByUser(DataMatrix R, double criteria)
        {
            int userCount = R.UserCount;
            int itemCount = R.ItemCount;
            Dictionary<int, List<int>> relevantItemsByUser = new Dictionary<int, List<int>>(userCount);

            // Select relevant items for each user
            foreach (Tuple<int, Vector<double>> user in R.Users)
            {
                int userIndex = user.Item1;
                RatingVector userRatings = new RatingVector(user.Item2);
                List<int> relevantItems = new List<int>();

                foreach (Tuple<int, double> element in userRatings.Ratings)
                {
                    int itemIndex = element.Item1;
                    double rating = element.Item2;
                    if (rating >= criteria)
                    {
                        // This is a relevant item
                        relevantItems.Add(itemIndex);
                    }
                }
                relevantItemsByUser[userIndex] = relevantItems;
            }

            return relevantItemsByUser;
        }
Beispiel #2
0
        /// <summary>
        /// The user-based KNN collaborative filtering described in paper: 
        /// Resnick, P., et al., "GroupLens: an open architecture for collaborative filtering of netnews", 1994.
        /// Link: http://dx.doi.org/10.1145/192844.192905
        /// </summary>
        /// <param name="R_train"></param>
        /// <param name="R_unknown"></param>
        /// <param name="K"></param>
        /// <returns></returns>
        public static DataMatrix PredictRatings(DataMatrix R_train, DataMatrix R_unknown, SimilarityData neighborsByUser, int K)
        {
            // Debug
            Debug.Assert(R_train.UserCount == R_unknown.UserCount);
            Debug.Assert(R_train.ItemCount == R_unknown.ItemCount);
            int cappedCount = 0, globalMeanCount = 0;

            // This matrix stores predictions
            DataMatrix R_predicted = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);

            // Basic statistics from train set
            double globalMean = R_train.GetGlobalMean();
            Vector<double> meanByUser = R_train.GetUserMeans();
            Vector<double> meanByItem = R_train.GetItemMeans();

            // Predict ratings for each test user
            // Single thread appears to be very fast, parallel.foreach is unnecessary
            Object lockMe = new Object();
            Parallel.ForEach(R_unknown.Users, user =>
            {
                int indexOfUser = user.Item1;
                RatingVector userRatings = new RatingVector(R_train.GetRow(indexOfUser));
                RatingVector unknownRatings = new RatingVector(user.Item2);

                Utils.PrintEpoch("Predicting user/total", indexOfUser, R_train.UserCount);

                // Note that there are more than K neighbors in the list (sorted by similarity)
                // we will use the top-K neighbors WHO HAVE RATED THE ITEM
                // For example we have 200 top neighbors, and we hope there are
                // K neighbors in the list have rated the item. We can't keep
                // everyone in the neighbor list because there are too many for large data sets
                var topNeighborsOfUser = neighborsByUser[indexOfUser];
                //Dictionary<int, double> topKNeighbors = KNNCore.GetTopKNeighborsByUser(userSimilarities, indexOfUser, K);

                double meanOfUser = meanByUser[indexOfUser];

                // Loop through each ratingto be predicted
                foreach (Tuple<int, double> unknownRating in unknownRatings.Ratings)
                {
                    int itemIndex = unknownRating.Item1;
                    double prediction;

                    // TODO: we actually should use the Top-K neighbors
                    // that have rated this item, otherwise we may have
                    // only a few neighbors rated this item

                    // Compute the average rating on item iid given 
                    // by the top K neighbors. Each rating is offsetted by
                    // the neighbor's average and weighted by the similarity
                    double weightedSum = 0;
                    double weightSum = 0;
                    int currentTopKCount = 0;
                    foreach (KeyValuePair<int, double> neighbor in topNeighborsOfUser)
                    {
                        int neighborIndex = neighbor.Key;
                        double similarityOfNeighbor = neighbor.Value;
                        double itemRatingOfNeighbor = R_train[neighborIndex, itemIndex];

                        // We count only if the neighbor has seen this item before
                        if (itemRatingOfNeighbor != 0)
                        {
                            weightSum += similarityOfNeighbor;
                            weightedSum += (itemRatingOfNeighbor - meanByUser[neighborIndex]) * similarityOfNeighbor;
                            currentTopKCount++;
                            if (currentTopKCount >= K) { break; }   // Stop when we have seen K neighbors
                        }
                    }
                    // A zero weightedSum means this is a cold item and global mean will be assigned by default
                    if (weightedSum != 0)
                    {
                        prediction = meanOfUser + weightedSum / weightSum;
                    }
                    else
                    {
                        prediction = globalMean;
                        globalMeanCount++;
                    }

                    // Cap the ratings
                    if (prediction > Config.Ratings.MaxRating)
                    {
                        cappedCount++;
                        prediction = Config.Ratings.MaxRating;
                    }
                    if (prediction < Config.Ratings.MinRating)
                    {
                        cappedCount++;
                        prediction = Config.Ratings.MinRating;
                    }

                    lock (lockMe)
                    {
                        R_predicted[indexOfUser, itemIndex] = prediction;
                    }
                }
            });
            Utils.PrintValue("# capped predictions", cappedCount.ToString("D"));
            Utils.PrintValue("# default predictions", globalMeanCount.ToString("D"));
            return R_predicted;
        }
Beispiel #3
0
        // TODO: Scalar preference relations based on Bradley-Terry model
        public static PrefRelations CreateScalar(DataMatrix R)
        {
            int userCount = R.UserCount;
            int itemCount = R.ItemCount;
            PrefRelations PR = new PrefRelations(itemCount);

            // Create a preference matrix for each user
            Object lockMe = new Object();
            Parallel.ForEach(R.Users, user =>
            {
                int userIndex = user.Item1;
                RatingVector userRatings = new RatingVector(user.Item2);

                Utils.PrintEpoch("Doing user/total", userIndex, userCount);

                // The diagonal refer to the i-i item pair
                SparseMatrix userPreferences = new SparseMatrix(itemCount);

                // The diagonal is left empty!
                //SparseMatrix.OfMatrix(Matrix.Build.SparseDiagonal(itemCount, Config.Preferences.EquallyPreferred));

                // TODO: Use Vector.Map2 to replace the following two foreach loops

                // Here we need to compare each pair of items rated by this user
                foreach (Tuple<int, double> left in userRatings.Ratings)
                {
                    int leftItemIndex = left.Item1;
                    double leftItemRating = left.Item2;

                    foreach (Tuple<int, double> right in userRatings.Ratings)
                    {
                        int rightItemIndex = right.Item1;

                        // TODO: We could compute only the lower triangular, 
                        // and uppwer will be a negative mirror
                        // Let's do it directly at this stage
                        double rightItemRating = right.Item2;

                        Debug.Assert(rightItemRating != 0 && leftItemRating != 0);

                        // Skip the diagonal
                        if (leftItemIndex == rightItemIndex) { continue; }

                        userPreferences[leftItemIndex, rightItemIndex] = 0.1 * (leftItemRating - rightItemRating + 5);//(double)leftItemRating / (leftItemRating + rightItemRating);
                    }
                }

                // Because pr's upper triangular should be a mirror of the lower triangular
                Debug.Assert((userPreferences.NonZerosCount).IsEven());
                double debug1 = (Math.Pow(((SparseVector)R.GetRow(userIndex)).NonZerosCount, 2)
                    - ((SparseVector)R.GetRow(userIndex)).NonZerosCount);
                double debug2 = userPreferences.NonZerosCount;
                Debug.Assert(debug1 == debug2);

                lock (lockMe)
                {
                    // Copy similarity values from lower triangular to upper triangular
                    //pr_uid = DenseMatrix.OfMatrix(pr_uid + pr_uid.Transpose() - DenseMatrix.CreateIdentity(pr_uid.RowCount));
                    PR[userIndex] = userPreferences;
                }
            });



            return PR;
        }
Beispiel #4
0
        /// <summary>
        /// The user-based KNN collaborative filtering described in paper:
        /// Resnick, P., et al., "GroupLens: an open architecture for collaborative filtering of netnews", 1994.
        /// Link: http://dx.doi.org/10.1145/192844.192905
        /// </summary>
        /// <param name="R_train"></param>
        /// <param name="R_unknown"></param>
        /// <param name="K"></param>
        /// <returns></returns>
        public static DataMatrix PredictRatings(DataMatrix R_train, DataMatrix R_unknown, SimilarityData neighborsByUser, int K)
        {
            // Debug
            Debug.Assert(R_train.UserCount == R_unknown.UserCount);
            Debug.Assert(R_train.ItemCount == R_unknown.ItemCount);
            int cappedCount = 0, globalMeanCount = 0;

            // This matrix stores predictions
            DataMatrix R_predicted = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);

            // Basic statistics from train set
            double          globalMean = R_train.GetGlobalMean();
            Vector <double> meanByUser = R_train.GetUserMeans();
            Vector <double> meanByItem = R_train.GetItemMeans();

            // Predict ratings for each test user
            // Single thread appears to be very fast, parallel.foreach is unnecessary
            Object lockMe = new Object();

            Parallel.ForEach(R_unknown.Users, user =>
            {
                int indexOfUser             = user.Item1;
                RatingVector userRatings    = new RatingVector(R_train.GetRow(indexOfUser));
                RatingVector unknownRatings = new RatingVector(user.Item2);

                Utils.PrintEpoch("Predicting user/total", indexOfUser, R_train.UserCount);

                // Note that there are more than K neighbors in the list (sorted by similarity)
                // we will use the top-K neighbors WHO HAVE RATED THE ITEM
                // For example we have 200 top neighbors, and we hope there are
                // K neighbors in the list have rated the item. We can't keep
                // everyone in the neighbor list because there are too many for large data sets
                var topNeighborsOfUser = neighborsByUser[indexOfUser];
                //Dictionary<int, double> topKNeighbors = KNNCore.GetTopKNeighborsByUser(userSimilarities, indexOfUser, K);

                double meanOfUser = meanByUser[indexOfUser];

                // Loop through each ratingto be predicted
                foreach (Tuple <int, double> unknownRating in unknownRatings.Ratings)
                {
                    int itemIndex = unknownRating.Item1;
                    double prediction;

                    // TODO: we actually should use the Top-K neighbors
                    // that have rated this item, otherwise we may have
                    // only a few neighbors rated this item

                    // Compute the average rating on item iid given
                    // by the top K neighbors. Each rating is offsetted by
                    // the neighbor's average and weighted by the similarity
                    double weightedSum   = 0;
                    double weightSum     = 0;
                    int currentTopKCount = 0;
                    foreach (KeyValuePair <int, double> neighbor in topNeighborsOfUser)
                    {
                        int neighborIndex           = neighbor.Key;
                        double similarityOfNeighbor = neighbor.Value;
                        double itemRatingOfNeighbor = R_train[neighborIndex, itemIndex];

                        // We count only if the neighbor has seen this item before
                        if (itemRatingOfNeighbor != 0)
                        {
                            weightSum   += similarityOfNeighbor;
                            weightedSum += (itemRatingOfNeighbor - meanByUser[neighborIndex]) * similarityOfNeighbor;
                            currentTopKCount++;
                            if (currentTopKCount >= K)
                            {
                                break;
                            }                                       // Stop when we have seen K neighbors
                        }
                    }
                    // A zero weightedSum means this is a cold item and global mean will be assigned by default
                    if (weightedSum != 0)
                    {
                        prediction = meanOfUser + weightedSum / weightSum;
                    }
                    else
                    {
                        prediction = globalMean;
                        globalMeanCount++;
                    }

                    // Cap the ratings
                    if (prediction > Config.Ratings.MaxRating)
                    {
                        cappedCount++;
                        prediction = Config.Ratings.MaxRating;
                    }
                    if (prediction < Config.Ratings.MinRating)
                    {
                        cappedCount++;
                        prediction = Config.Ratings.MinRating;
                    }

                    lock (lockMe)
                    {
                        R_predicted[indexOfUser, itemIndex] = prediction;
                    }
                }
            });
            Utils.PrintValue("# capped predictions", cappedCount.ToString("D"));
            Utils.PrintValue("# default predictions", globalMeanCount.ToString("D"));
            return(R_predicted);
        }