Пример #1
0
     public static void GetCosineOfPrefRelations(PrefRelations PR, int maxCountOfNeighbors,
                     double strongSimilarityThreshold, out SimilarityData neighborsByObject)
     {
         HashSet<Tuple<int, int>> foo;
         ComputeSimilarities(PR, SimilarityMetric.CosinePrefRelations, maxCountOfNeighbors,
 strongSimilarityThreshold, out neighborsByObject, out foo);
     }
Пример #2
0
        public static DataMatrix PredictRatings(PrefRelations PR_train, DataMatrix R_unknown,
                                                int maxEpoch, double learnRate, double regularizationOfUser, double regularizationOfItem, int factorCount)
        {
            // Latent features
            List <Vector <double> > P;
            List <Vector <double> > Q;

            LearnLatentFeatures(PR_train, maxEpoch, learnRate, regularizationOfUser, regularizationOfItem, factorCount, out P, out Q);

            List <Tuple <int, int, double> > R_predicted_cache = new List <Tuple <int, int, double> >();

            foreach (var data in R_unknown.Matrix.EnumerateIndexed(Zeros.AllowSkip))
            {
                int indexOfUser = data.Item1;
                int indexOfItem = data.Item2;
                R_predicted_cache.Add(new Tuple <int, int, double>(indexOfUser, indexOfItem, P[indexOfUser].DotProduct(Q[indexOfItem])));
            }

            DataMatrix R_predicted = new DataMatrix(SparseMatrix.OfIndexed(R_unknown.UserCount, R_unknown.ItemCount, R_predicted_cache));

            //new DataMatrix(R_unknown.Matrix.PointwiseMultiply(P.Multiply(Q)));
            // TODO: should we do this? should we put it into [0,1]??? Seems zero entries are also converted into 0.5!Normalize the result
            //R_predicted.Matrix.MapInplace(x => RecSys.Core.SpecialFunctions.InverseLogit(x), Zeros.AllowSkip);
            return(R_predicted);
        }
Пример #3
0
        public static PrefRelations PredictPrefRelations(PrefRelations PR_train, SparseMatrix PR_unknown,
                                                         int maxEpoch, double learnRate, double regularizationOfUser, double regularizationOfItem, int factorCount)
        {
            // Latent features
            List <Vector <double> > P;
            List <Vector <double> > Q;

            //Matrix<double> P;
            //Matrix<double> Q;

            LearnLatentFeatures(PR_train, maxEpoch, learnRate, regularizationOfUser, regularizationOfItem, factorCount, out P, out Q);

            PrefRelations PR_predicted = new PrefRelations(PR_train.ItemCount);

            Object lockMe = new Object();

            Parallel.ForEach(PR_unknown.EnumerateRowsIndexed(), user =>
            {
                int indexOfUser = user.Item1;
                Vector <double> unknownPreferencesOfUser = user.Item2;
                SparseMatrix predictedPreferencesOfUser  = new SparseMatrix(PR_train.ItemCount, PR_train.ItemCount);

                // Predict each unknown preference
                foreach (var unknownPreference in unknownPreferencesOfUser.EnumerateIndexed(Zeros.AllowSkip))
                {
                    int indexOfItem_i              = unknownPreference.Item1;
                    int indexOfItem_j              = (int)unknownPreference.Item2;
                    double estimate_uij            = P[indexOfUser].DotProduct(Q[indexOfItem_i] - Q[indexOfItem_j]); // Eq. 2
                    double normalized_estimate_uij = Core.SpecialFunctions.InverseLogit(estimate_uij);               // pi_uij in paper
                    predictedPreferencesOfUser[indexOfItem_i, indexOfItem_j] = normalized_estimate_uij;
                }

                lock (lockMe)
                {
                    PR_predicted[indexOfUser] = predictedPreferencesOfUser;
                }
            });

            return(PR_predicted);
        }
Пример #4
0
        public static DataMatrix PredictRatings(PrefRelations PR_train, DataMatrix R_unknown,
           int maxEpoch, double learnRate, double regularizationOfUser, double regularizationOfItem, int factorCount)
        {
            // Latent features
            List<Vector<double>> P;
            List<Vector<double>> Q;

            LearnLatentFeatures(PR_train, maxEpoch, learnRate, regularizationOfUser, regularizationOfItem, factorCount, out P, out Q);

            List<Tuple<int, int, double>> R_predicted_cache = new List<Tuple<int, int, double>>();
            foreach(var data in R_unknown.Matrix.EnumerateIndexed(Zeros.AllowSkip))
            {
                int indexOfUser = data.Item1;
                int indexOfItem = data.Item2;
                R_predicted_cache.Add(new Tuple<int, int, double>(indexOfUser, indexOfItem, P[indexOfUser].DotProduct(Q[indexOfItem])));
            }

            DataMatrix R_predicted = new DataMatrix(SparseMatrix.OfIndexed(R_unknown.UserCount,R_unknown.ItemCount,R_predicted_cache));
                //new DataMatrix(R_unknown.Matrix.PointwiseMultiply(P.Multiply(Q)));
            // TODO: should we do this? should we put it into [0,1]??? Seems zero entries are also converted into 0.5!Normalize the result
            //R_predicted.Matrix.MapInplace(x => RecSys.Core.SpecialFunctions.InverseLogit(x), Zeros.AllowSkip);
            return R_predicted;
        }
Пример #5
0
        /// <summary>
        /// Switch between different metrics.
        /// </summary>
        /// <param name="PR"></param>
        /// <param name="similarityMetric"></param>
        /// <returns></returns>
        private static void ComputeSimilarities(PrefRelations PR,
            Metric.SimilarityMetric similarityMetric, int maxCountOfNeighbors,
                        double minSimilarityThreshold, out SimilarityData neighborsByObject,
            out HashSet<Tuple<int, int>> strongSimilarityIndicators)
        {
            int dimension = PR.UserCount;
            HashSet<Tuple<int, int>> strongSimilarityIndicators_out = new HashSet<Tuple<int, int>>();
            SimilarityData neighborsByObject_out = new SimilarityData(maxCountOfNeighbors);

            // Compute similarity for the lower triangular
            Object lockMe = new Object();
            Parallel.For(0, dimension, i =>
            {
                Utils.PrintEpoch("Progress current/total", i, dimension);

                for (int j = 0; j < dimension; j++)
                {
                    if (i == j) { continue; } // Skip self similarity

                    else if (i > j)
                    {
                        switch (similarityMetric)
                        {
                            case SimilarityMetric.CosinePrefRelations:
                                double cosinePR = Metric.cosinePR(PR, i, j);
                                lock (lockMe)
                                {
                                    if (cosinePR > minSimilarityThreshold)
                                    {
                                        strongSimilarityIndicators_out.Add(new Tuple<int, int>(i, j));
                                    }
                                    neighborsByObject_out.AddSimilarityData(i, j, cosinePR);
                                    neighborsByObject_out.AddSimilarityData(j, i, cosinePR);
                                }
                                break;
                            // More metrics to be added here.
                        }
                    }
                }
            });

            neighborsByObject = neighborsByObject_out;
            strongSimilarityIndicators = strongSimilarityIndicators_out;
        }
Пример #6
0
        // TODO: Scalar preference relations based on Bradley-Terry model
        public static PrefRelations CreateScalar(DataMatrix R)
        {
            int userCount = R.UserCount;
            int itemCount = R.ItemCount;
            PrefRelations PR = new PrefRelations(itemCount);

            // Create a preference matrix for each user
            Object lockMe = new Object();
            Parallel.ForEach(R.Users, user =>
            {
                int userIndex = user.Item1;
                RatingVector userRatings = new RatingVector(user.Item2);

                Utils.PrintEpoch("Doing user/total", userIndex, userCount);

                // The diagonal refer to the i-i item pair
                SparseMatrix userPreferences = new SparseMatrix(itemCount);

                // The diagonal is left empty!
                //SparseMatrix.OfMatrix(Matrix.Build.SparseDiagonal(itemCount, Config.Preferences.EquallyPreferred));

                // TODO: Use Vector.Map2 to replace the following two foreach loops

                // Here we need to compare each pair of items rated by this user
                foreach (Tuple<int, double> left in userRatings.Ratings)
                {
                    int leftItemIndex = left.Item1;
                    double leftItemRating = left.Item2;

                    foreach (Tuple<int, double> right in userRatings.Ratings)
                    {
                        int rightItemIndex = right.Item1;

                        // TODO: We could compute only the lower triangular, 
                        // and uppwer will be a negative mirror
                        // Let's do it directly at this stage
                        double rightItemRating = right.Item2;

                        Debug.Assert(rightItemRating != 0 && leftItemRating != 0);

                        // Skip the diagonal
                        if (leftItemIndex == rightItemIndex) { continue; }

                        userPreferences[leftItemIndex, rightItemIndex] = 0.1 * (leftItemRating - rightItemRating + 5);//(double)leftItemRating / (leftItemRating + rightItemRating);
                    }
                }

                // Because pr's upper triangular should be a mirror of the lower triangular
                Debug.Assert((userPreferences.NonZerosCount).IsEven());
                double debug1 = (Math.Pow(((SparseVector)R.GetRow(userIndex)).NonZerosCount, 2)
                    - ((SparseVector)R.GetRow(userIndex)).NonZerosCount);
                double debug2 = userPreferences.NonZerosCount;
                Debug.Assert(debug1 == debug2);

                lock (lockMe)
                {
                    // Copy similarity values from lower triangular to upper triangular
                    //pr_uid = DenseMatrix.OfMatrix(pr_uid + pr_uid.Transpose() - DenseMatrix.CreateIdentity(pr_uid.RowCount));
                    PR[userIndex] = userPreferences;
                }
            });



            return PR;
        }
Пример #7
0
        public string GetReadyForOrdinal(bool saveLoadedData = true)
        {
            if (!ReadyForNumerical) { GetReadyForNumerical(); }
            if (ReadyForOrdinal) { return "Is ready."; }

            StringBuilder log = new StringBuilder();
            Utils.StartTimer();
            log.AppendLine(Utils.PrintHeading("Prepare preferecen relation data"));

            Console.WriteLine("Converting R_train into PR_train");
            log.AppendLine("Converting R_train into PR_train");
            PR_train = PrefRelations.CreateDiscrete(R_train);

            //Console.WriteLine("Converting R_test into PR_test");
            //log.AppendLine("Converting R_test into PR_test");
            //PR_test = PrefRelations.CreateDiscrete(R_test);

            log.AppendLine(Utils.StopTimer());

            #region Prepare similarity data
            if (File.Exists(GetDataFileName("USP"))
                && File.Exists(GetDataFileName("ISP"))
                && File.Exists(GetDataFileName("SSIIP")))
            {

                Utils.StartTimer();
                Utils.PrintHeading("Load user, item, indicators variables (Pref based)");
                UserSimilaritiesOfPref = Utils.IO<SimilarityData>.LoadObject(GetDataFileName("USP"));
                ItemSimilaritiesOfPref = Utils.IO<SimilarityData>.LoadObject(GetDataFileName("ISP"));
                StrongSimilarityIndicatorsByItemPref = Utils.IO<HashSet<Tuple<int, int>>>.LoadObject(GetDataFileName("SSIIP"));
                Utils.StopTimer();
            }
            else
            {
                Utils.StartTimer();
                Utils.PrintHeading("Compute user-user similarities (Pref based)");
                Metric.GetCosineOfPrefRelations(PR_train, MaxCountOfNeighbors, 
                    StrongSimilarityThreshold, out UserSimilaritiesOfPref);
                Utils.StopTimer();

                // For the moment, we use user-wise preferences to compute
                // item-item similarities, it is not the same as user-user pref similarities
                Utils.StartTimer();
                Utils.PrintHeading("Compute item-item similarities (Pref based)");
                DataMatrix PR_userwise_preferences = new DataMatrix(PR_train.GetPositionMatrix());
                Metric.GetPearsonOfColumns(PR_userwise_preferences, MaxCountOfNeighbors, StrongSimilarityThreshold,
                    out ItemSimilaritiesOfPref, out StrongSimilarityIndicatorsByItemPref);
                Utils.StopTimer();

                if (saveLoadedData)
                {
                    Utils.IO<SimilarityData>.SaveObject(UserSimilaritiesOfPref, GetDataFileName("USP"));
                    Utils.IO<SimilarityData>.SaveObject(ItemSimilaritiesOfPref, GetDataFileName("ISP"));
                    Utils.IO<HashSet<Tuple<int,int>>>
                        .SaveObject(StrongSimilarityIndicatorsByItemPref, GetDataFileName("SSIIP"));
                }
                Utils.StopTimer();

            }
            #endregion

            

            ReadyForOrdinal = true;

            return log.ToString();
        }
Пример #8
0
        // TODO: Scalar preference relations based on Bradley-Terry model
        public static PrefRelations CreateScalar(DataMatrix R)
        {
            int           userCount = R.UserCount;
            int           itemCount = R.ItemCount;
            PrefRelations PR        = new PrefRelations(itemCount);

            // Create a preference matrix for each user
            Object lockMe = new Object();

            Parallel.ForEach(R.Users, user =>
            {
                int userIndex            = user.Item1;
                RatingVector userRatings = new RatingVector(user.Item2);

                Utils.PrintEpoch("Doing user/total", userIndex, userCount);

                // The diagonal refer to the i-i item pair
                SparseMatrix userPreferences = new SparseMatrix(itemCount);

                // The diagonal is left empty!
                //SparseMatrix.OfMatrix(Matrix.Build.SparseDiagonal(itemCount, Config.Preferences.EquallyPreferred));

                // TODO: Use Vector.Map2 to replace the following two foreach loops

                // Here we need to compare each pair of items rated by this user
                foreach (Tuple <int, double> left in userRatings.Ratings)
                {
                    int leftItemIndex     = left.Item1;
                    double leftItemRating = left.Item2;

                    foreach (Tuple <int, double> right in userRatings.Ratings)
                    {
                        int rightItemIndex = right.Item1;

                        // TODO: We could compute only the lower triangular,
                        // and uppwer will be a negative mirror
                        // Let's do it directly at this stage
                        double rightItemRating = right.Item2;

                        Debug.Assert(rightItemRating != 0 && leftItemRating != 0);

                        // Skip the diagonal
                        if (leftItemIndex == rightItemIndex)
                        {
                            continue;
                        }

                        userPreferences[leftItemIndex, rightItemIndex] = 0.1 * (leftItemRating - rightItemRating + 5);//(double)leftItemRating / (leftItemRating + rightItemRating);
                    }
                }

                // Because pr's upper triangular should be a mirror of the lower triangular
                Debug.Assert((userPreferences.NonZerosCount).IsEven());
                double debug1 = (Math.Pow(((SparseVector)R.GetRow(userIndex)).NonZerosCount, 2)
                                 - ((SparseVector)R.GetRow(userIndex)).NonZerosCount);
                double debug2 = userPreferences.NonZerosCount;
                Debug.Assert(debug1 == debug2);

                lock (lockMe)
                {
                    // Copy similarity values from lower triangular to upper triangular
                    //pr_uid = DenseMatrix.OfMatrix(pr_uid + pr_uid.Transpose() - DenseMatrix.CreateIdentity(pr_uid.RowCount));
                    PR[userIndex] = userPreferences;
                }
            });



            return(PR);
        }
Пример #9
0
        public static DataMatrix PredictRatings(PrefRelations PR_train,
            DataMatrix R_unknown, int K, SimilarityData neighborsByUser)
        {
            Debug.Assert(PR_train.UserCount == R_unknown.UserCount);
            Debug.Assert(PR_train.ItemCount == R_unknown.ItemCount);

            // This matrix stores predictions
            DataMatrix R_predicted = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);

            // This can be considered as the R_train in standard UserKNN
            SparseMatrix positionMatrix = PR_train.GetPositionMatrix();
            DataMatrix ratingMatrixFromPositions = new DataMatrix(positionMatrix);

            Vector<double> meanByUser = ratingMatrixFromPositions.GetUserMeans();
            Vector<double> meanByItem = ratingMatrixFromPositions.GetItemMeans();
            double globalMean = ratingMatrixFromPositions.GetGlobalMean();

            // Predict positions for each test user
            // Appears to be very fast, parallel.foreach is unnecessary
            foreach (Tuple<int, Vector<double>> user in R_unknown.Users)
            {
                int indexOfUser = user.Item1;
                Vector<double> indexesOfUnknownRatings = user.Item2;

                Utils.PrintEpoch("Predicting user/total", indexOfUser, PR_train.UserCount);

                // Note that there are more than K neighbors in the list (sorted by similarity)
                // we will use the top-K neighbors WHO HAVE RATED THE ITEM
                // For example we have 200 top neighbors, and we hope there are
                // K neighbors in the list have rated the item. We can't keep
                // everyone in the neighbor list because there are too many for large data sets
                var topNeighborsOfUser = neighborsByUser[indexOfUser];

                double meanOfUser = meanByUser[indexOfUser];

                // Loop through each position to be predicted
                foreach (Tuple<int, double> unknownRating in indexesOfUnknownRatings.EnumerateIndexed(Zeros.AllowSkip))
                {
                    int indexOfUnknownItem = unknownRating.Item1;

                    // Compute the position of this item for the user
                    // by combining neighbors' positions on this item
                    double weightedSum = 0;
                    double weightSum = 0;
                    int currentTopKCount = 0;
                    foreach (KeyValuePair<int, double> neighbor in topNeighborsOfUser)
                    {
                        int indexOfNeighbor = neighbor.Key;
                        double similarityOfNeighbor = neighbor.Value;
                        double itemPositionOfNeighbor = ratingMatrixFromPositions[indexOfNeighbor, indexOfUnknownItem];

                        // We count only if the neighbor has seen this item before
                        if (itemPositionOfNeighbor != 0)
                        {
                            // Recall that we use a constant to hold position value 0
                            // we revert it back here
                            if (itemPositionOfNeighbor == Config.ZeroInSparseMatrix)
                            {
                                Debug.Assert(true, "By using the PositionShift constant, we should not be in here.");
                                itemPositionOfNeighbor = 0;
                            }
                            weightSum += similarityOfNeighbor;
                            weightedSum += (itemPositionOfNeighbor - meanByUser[indexOfNeighbor]) * similarityOfNeighbor;
                            currentTopKCount++;
                            if(currentTopKCount>= K)
                            {
                                break;
                            }
                        }
                    }

                    // If any neighbor has seen this item
                    if (currentTopKCount != 0)
                    {
                        // TODO: Add user mean may improve the performance
                        R_predicted[indexOfUser, indexOfUnknownItem] = meanOfUser + weightedSum / weightSum;
                    }
                    else
                    {
                        R_predicted[indexOfUser, indexOfUnknownItem] = globalMean;
                    }
                }
            }//);
            return R_predicted;
        }
Пример #10
0
        public static Dictionary<int, List<int>> RecommendTopN(PrefRelations PR_train, int K, List<int> targetUsers, int topN)
        {
            Dictionary<int, List<int>> topNItemsByUser = new Dictionary<int, List<int>>(targetUsers.Count);

            int userCount = PR_train.UserCount;
            int itemCount = PR_train.ItemCount;
            SparseMatrix positionMatrix = PR_train.GetPositionMatrix();

            // Make recommendations to each target user
            foreach (int indexOfUser in targetUsers)
            {
                Utils.PrintEpoch("Current user/total", indexOfUser, targetUsers.Count);

                // TODO: should have a default list of popular items in case of cold users
                Dictionary<int, double> topNItems = new Dictionary<int, double>(topN);   // To store recommendations for indexOfUser
                Dictionary<int, double> topKNeighbors = KNNCore.GetTopKNeighborsByUser(PR_train.UserSimilarities, indexOfUser, K);
                SparseVector predictedPositionsOfUser = new SparseVector(itemCount);

                // Compute the predicted position of each item for indexOfUser
                for (int indexOfItem = 0; indexOfItem < itemCount; ++indexOfItem)
                {
                    // Compute the position of this item for the user
                    // by combining neighbors' positions on this item
                    double weightedSum = 0;
                    double weightSum = 0;
                    int itemSeenCount = 0;
                    foreach (KeyValuePair<int, double> neighbor in topKNeighbors)
                    {
                        int indexOfNeighbor = neighbor.Key;
                        double similarityOfNeighbor = neighbor.Value;
                        double itemPositionOfNeighbor = positionMatrix[indexOfNeighbor, indexOfItem];

                        // TODO: Zero means it is not seen by the neighbor but 
                        // it may also be the position value of 0
                        if (itemPositionOfNeighbor != 0)
                        {
                            weightSum += similarityOfNeighbor;
                            weightedSum += itemPositionOfNeighbor * similarityOfNeighbor;
                            itemSeenCount++;
                        }
                    }

                    // If any neighbor has seen this item
                    if (itemSeenCount != 0)
                    {
                        // TODO: Add user mean may improve the performance
                        predictedPositionsOfUser[indexOfItem] = weightedSum / weightSum;
                    }
                }
                List<int> indexesOfItemSortedByPosition = Enumerable.Range(0, itemCount).ToList();

                Sorting.Sort(predictedPositionsOfUser, indexesOfItemSortedByPosition);
                indexesOfItemSortedByPosition.Reverse(); // Make it descending order by position
                // Add the top N items for user uid
                topNItemsByUser[indexOfUser] = indexesOfItemSortedByPosition.GetRange(0, topN);
            }

            return topNItemsByUser;
            #region Old version
            /*
            //===============Initialize variables==================

            // Recommendations are stored here indexed by user id
            Dictionary<int, List<int>> userRecommendations = new Dictionary<int, List<int>>(targetUsers.Count);

            int userCount = PR_train.UserCount;
            int itemCount = PR_train.ItemCount;

            // Build the item position matrix
            // each element indicates the position(kind of goodness) of an item to the user
            SparseMatrix itemPositions = new SparseMatrix(userCount, itemCount);

            Object lockMe = new Object();
            Parallel.ForEach(PR_train.GetAllPreferenceRelations, pair =>
            {
                int uid = pair.Key;
                Utilities.PrintEpoch("Current user/total", uid, userCount);
                SparseMatrix userPreferences = pair.Value;
                foreach (Tuple<int, Vector<double>> preferences in userPreferences.EnumerateRowsIndexed())
                {
                    int iid = preferences.Item1;
                    SparseVector iidPreferences = SparseVector.OfVector(preferences.Item2);
                    // The number of items that are preferred to item iid
                    int preferredCount = 0;
                    // The number of items that are less preferred to item iid
                    int lessPreferredCount = 0;
                    // The number of items (other than item iid) that are equally preferred to item iid
                    // TODO: I'm not sure if we should count unknown preferences or not?
                    int equallyPreferredCount = 0;

                    // Note: don't use the Count() method it won't skip Zeros
                    foreach (double preference in iidPreferences.Enumerate(Zeros.AllowSkip))
                    {
                        if (preference == Config.Preferences.Preferred)
                            ++preferredCount;
                        else if (preference == Config.Preferences.LessPreferred)
                            ++lessPreferredCount;
                        else if (preference == Config.Preferences.EquallyPreferred)
                            ++equallyPreferredCount;
                        else { Debug.Assert(false, "We should not see any non-match value here."); }
                    }

                    double position = ((double)lessPreferredCount - preferredCount) / (preferredCount + lessPreferredCount + equallyPreferredCount);

                    Debug.Assert(position >= -1 && position <= 1);  // According to the paper
                    if (position == 0) { Debug.Assert(preferredCount == lessPreferredCount); }  // According to the paper

                    lock (lockMe)
                    {
                        itemPositions[uid, iid] = position;
                    }
                }
            });

            // Need to cache the items appeared in each user's profile
            // as we won't consider unseen items as recommendations
            Dictionary<int, List<int>> seenItemsByUser = PR_train.GetSeenItemsByUser();

            Matrix positionMatrix = PR_train.GetPositionMatrix();

            Console.WriteLine("Recommending user/total");

            // Make recommendations for each target user
            foreach (int uid in targetUsers)
            {

                Utilities.PrintEpoch("Current user/total", uid, targetUsers.Count);

                // TODO: should have a default list of popular items in case of cold users
                Dictionary<int, double> topN = new Dictionary<int, double>(topNCount);   // To store recommendations for user uid

                Dictionary<int, double> topK = KNNCore.GetTopK(PR_train.UserSimilarities, uid, K);

                // Get a list of all candidate items
                List<int> candidateItems = new List<int>();
                foreach (int uid_neighbor in topK.Keys)
                {
                    // TODO: union will remove duplicates, seems to be expensive here
                    candidateItems = candidateItems.Union(seenItemsByUser[uid_neighbor]).ToList();
                }

                // Loop through all candidate items
                double minPosition = double.MinValue;
                int min_iid = int.MinValue;
                foreach (int iid in candidateItems)
                {
                    // Compute the average position on item iid given 
                    // by the top K neighbors. Each position is weighted 
                    // by the similarity to the target user
                    double weightedSum = 0;
                    double weightSum = 0;
                    foreach (KeyValuePair<int, double> neighbor in topK)
                    {
                        int uidNeighbor = neighbor.Key;
                        double similarity = neighbor.Value;
                        double iidPosition = itemPositions[uidNeighbor, iid];
                        // TODO: check the standard KNN, we should skip the unseen items somehow!
                        //if (neighborRating != 0)
                        // The weightSum serves as the normalization term
                        // it needs abs() because some metric such as Pearson 
                        // may produce negative weights
                        weightSum += Math.Abs(similarity);
                        weightedSum += iidPosition * similarity;
                    }

                    double position_predicted = weightedSum / weightSum;  // TODO: add some kind of user mean to improve?

                    // TODO: should have a default list of popular items in case of cold users

                    if (topN.Count < topNCount)  // Fill the top N list untill it is full
                    {
                        topN[iid] = position_predicted;
                        if (topN.Count == topNCount)
                        {
                            // Find the item with least position when we have N items in the list
                            min_iid = topN.Aggregate((l, r) => l.Value < r.Value ? l : r).Key;
                            minPosition = topN[min_iid];
                        }
                    }
                    else if (position_predicted > minPosition)
                    {
                        // Replace the least similar neighbor
                        topN.Remove(min_iid);
                        topN[iid] = position_predicted;

                        // Find the item with least position
                        min_iid = topN.Aggregate((l, r) => l.Value < r.Value ? l : r).Key;
                        minPosition = topN[min_iid];
                    }
                }
                // Add the top N items for user uid
                userRecommendations[uid] = topN.Keys.ToList();
            }

            return userRecommendations;
            */
            #endregion
        }
Пример #11
0
        public static DataMatrix PredictRatings(PrefRelations PR_train,
                                                DataMatrix R_unknown, int K, SimilarityData neighborsByUser)
        {
            Debug.Assert(PR_train.UserCount == R_unknown.UserCount);
            Debug.Assert(PR_train.ItemCount == R_unknown.ItemCount);

            // This matrix stores predictions
            DataMatrix R_predicted = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);

            // This can be considered as the R_train in standard UserKNN
            SparseMatrix positionMatrix            = PR_train.GetPositionMatrix();
            DataMatrix   ratingMatrixFromPositions = new DataMatrix(positionMatrix);

            Vector <double> meanByUser = ratingMatrixFromPositions.GetUserMeans();
            Vector <double> meanByItem = ratingMatrixFromPositions.GetItemMeans();
            double          globalMean = ratingMatrixFromPositions.GetGlobalMean();

            // Predict positions for each test user
            // Appears to be very fast, parallel.foreach is unnecessary
            foreach (Tuple <int, Vector <double> > user in R_unknown.Users)
            {
                int             indexOfUser             = user.Item1;
                Vector <double> indexesOfUnknownRatings = user.Item2;

                Utils.PrintEpoch("Predicting user/total", indexOfUser, PR_train.UserCount);

                // Note that there are more than K neighbors in the list (sorted by similarity)
                // we will use the top-K neighbors WHO HAVE RATED THE ITEM
                // For example we have 200 top neighbors, and we hope there are
                // K neighbors in the list have rated the item. We can't keep
                // everyone in the neighbor list because there are too many for large data sets
                var topNeighborsOfUser = neighborsByUser[indexOfUser];

                double meanOfUser = meanByUser[indexOfUser];

                // Loop through each position to be predicted
                foreach (Tuple <int, double> unknownRating in indexesOfUnknownRatings.EnumerateIndexed(Zeros.AllowSkip))
                {
                    int indexOfUnknownItem = unknownRating.Item1;

                    // Compute the position of this item for the user
                    // by combining neighbors' positions on this item
                    double weightedSum      = 0;
                    double weightSum        = 0;
                    int    currentTopKCount = 0;
                    foreach (KeyValuePair <int, double> neighbor in topNeighborsOfUser)
                    {
                        int    indexOfNeighbor        = neighbor.Key;
                        double similarityOfNeighbor   = neighbor.Value;
                        double itemPositionOfNeighbor = ratingMatrixFromPositions[indexOfNeighbor, indexOfUnknownItem];

                        // We count only if the neighbor has seen this item before
                        if (itemPositionOfNeighbor != 0)
                        {
                            // Recall that we use a constant to hold position value 0
                            // we revert it back here
                            if (itemPositionOfNeighbor == Config.ZeroInSparseMatrix)
                            {
                                Debug.Assert(true, "By using the PositionShift constant, we should not be in here.");
                                itemPositionOfNeighbor = 0;
                            }
                            weightSum   += similarityOfNeighbor;
                            weightedSum += (itemPositionOfNeighbor - meanByUser[indexOfNeighbor]) * similarityOfNeighbor;
                            currentTopKCount++;
                            if (currentTopKCount >= K)
                            {
                                break;
                            }
                        }
                    }

                    // If any neighbor has seen this item
                    if (currentTopKCount != 0)
                    {
                        // TODO: Add user mean may improve the performance
                        R_predicted[indexOfUser, indexOfUnknownItem] = meanOfUser + weightedSum / weightSum;
                    }
                    else
                    {
                        R_predicted[indexOfUser, indexOfUnknownItem] = globalMean;
                    }
                }
            }//);
            return(R_predicted);
        }
Пример #12
0
        private static void LearnLatentFeatures(PrefRelations PR_train, int maxEpoch,
                                                double learnRate, double regularizationOfUser, double regularizationOfItem,
                                                int factorCount, out List <Vector <double> > P, out List <Vector <double> > Q)
        {
            //regularizationOfUser = 0;
            //regularizationOfItem = 0;
            int userCount = PR_train.UserCount;
            int itemCount = PR_train.ItemCount;

            // User latent vectors with default seed
            P = new List <Vector <double> >();
            Q = new List <Vector <double> >();
            ContinuousUniform uniformDistribution = new ContinuousUniform(0, 0.1, new Random(Config.Seed));

            //var p = Utils.CreateRandomMatrixFromUniform(userCount, factorCount, 0, 0.1, Config.Seed);
            for (int i = 0; i < userCount; i++)
            {
                P.Add(DenseVector.CreateRandom(factorCount, uniformDistribution));
            }
            for (int i = 0; i < itemCount; i++)
            {
                Q.Add(DenseVector.CreateRandom(factorCount, uniformDistribution));
            }
            //   P = Utils.CreateRandomMatrixFromUniform(userCount, factorCount, 0, 0.1, Config.Seed);
            // Item latent vectors with a different seed
            //Q = Utils.CreateRandomMatrixFromUniform(factorCount, itemCount, 0, 0.1, Config.Seed + 1);

            // SGD
            double previousErrorSum = long.MaxValue;

            for (int epoch = 0; epoch < maxEpoch; ++epoch)
            {
                // For each epoch, we will iterate through all
                // preference relations of all users

                // Loop through each user
                foreach (var pair in PR_train.PreferenceRelationsByUser)
                {
                    int          indexOfUser = pair.Key;
                    SparseMatrix preferenceRelationsOfUser = pair.Value;

                    // For each preference relation of this user, update the latent feature vectors
                    foreach (var entry in preferenceRelationsOfUser.EnumerateIndexed(Zeros.AllowSkip))
                    {
                        int indexOfItem_i = entry.Item1;
                        int indexOfItem_j = entry.Item2;
                        //Console.WriteLine(preferenceRelationsOfUser[indexOfItem_i, indexOfItem_j]);
                        //Console.WriteLine(preferenceRelationsOfUser[indexOfItem_j, indexOfItem_i]);
                        if (indexOfItem_i >= indexOfItem_j)
                        {
                            continue;
                        }

                        // Warning: here we need to convert the customized preference indicators
                        // from 1,2,3 into 0,0.5,1 for match the scale of predicted pi, which is in range [0,1]
                        double prefRelation_uij = 0;
                        if (entry.Item3 == Config.Preferences.Preferred)
                        {
                            prefRelation_uij = 1.0;
                        }
                        else if (entry.Item3 == Config.Preferences.EquallyPreferred)
                        {
                            prefRelation_uij = 0.5;
                        }
                        else if (entry.Item3 == Config.Preferences.LessPreferred)
                        {
                            prefRelation_uij = 0.0;
                        }
                        else
                        {
                            Debug.Assert(true, "Should not be here.");
                        }

                        // TODO: Maybe it can be faster to do two dot products to remove the substraction (lose sparse  property I think)
                        double PQ_ui        = P[indexOfUser].DotProduct(Q[indexOfItem_i]);
                        double PQ_uj        = P[indexOfUser].DotProduct(Q[indexOfItem_j]);
                        double estimate_uij = PQ_ui - PQ_uj;
                        //double estimate_uij = P.Row(indexOfUser).DotProduct(Q.Column(indexOfItem_i) - Q.Column(indexOfItem_j));   // Eq. 2


                        double exp_estimate_uij        = Math.Exp(estimate_uij);                      // enumerator in Eq. 2
                        double normalized_estimate_uij = SpecialFunctions.InverseLogit(estimate_uij); // pi_uij in paper

                        //Debug.Assert(prefRelation_uij >= 0 && prefRelation_uij <= 1);
                        //Debug.Assert(normalized_estimate_uij >= 0 && normalized_estimate_uij <= 1);


                        // The error term in Eq. 6-9. Note that the author's paper incorrectly puts a power on the error
                        double e_uij = prefRelation_uij - normalized_estimate_uij;
                        //double e_uij = Math.Pow(prefRelation_uij - normalized_estimate_uij, 2) ;  // from Eq. 3&6
                        double e_uij_derivative = (e_uij * normalized_estimate_uij) / (1 + exp_estimate_uij);

                        // Update feature vectors
                        Vector <double> P_u  = P[indexOfUser];
                        Vector <double> Q_i  = Q[indexOfItem_i];
                        Vector <double> Q_j  = Q[indexOfItem_j];
                        Vector <double> Q_ij = Q_i - Q_j;

                        P[indexOfUser] += Q_ij.Multiply(e_uij_derivative * learnRate) - P_u.Multiply(regularizationOfUser * learnRate);

                        // Eq. 7, note that the author's paper incorrectly writes + regularization
                        //Vector<double> P_u_updated = P_u + (Q_ij.Multiply(e_uij_derivative) - P_u.Multiply(regularizationOfUser)).Multiply(learnRate);
                        //P[indexOfUser] = P_u_updated;
                        Vector <double> P_u_derivative = P_u.Multiply(e_uij_derivative * learnRate);
                        // Eq. 8, note that the author's paper incorrectly writes + regularization
                        //Vector<double> Q_i_updated = Q_i + (P_u_derivative - Q_i.Multiply(regularizationOfItem * learnRate));
                        //Q[indexOfItem_i] = Q_i_updated;

                        Q[indexOfItem_i] += (P_u_derivative - Q_i.Multiply(regularizationOfItem * learnRate));

                        // Eq. 9, note that the author's paper incorrectly writes + regularization
                        //Vector<double> Q_j_updated = Q_j - (P_u_derivative - Q_j.Multiply(regularizationOfItem * learnRate));
                        //Q[indexOfItem_j] =Q_j_updated;
                        Q[indexOfItem_j] -= (P_u_derivative - Q_j.Multiply(regularizationOfItem * learnRate));

                        double estimate_uij_updated            = P[indexOfUser].DotProduct(Q[indexOfItem_i] - Q[indexOfItem_j]); // Eq. 2
                        double exp_estimate_uij_updated        = Math.Exp(estimate_uij_updated);                                 // enumerator in Eq. 2
                        double normalized_estimate_uij_updated = SpecialFunctions.InverseLogit(estimate_uij_updated);            // pi_uij in paper
                        //double e_uij_updated = Math.Pow(prefRelation_uij - normalized_estimate_uij_updated, 2);  // from Eq. 3&6
                        double e_uij_updated = prefRelation_uij - normalized_estimate_uij_updated;                               // from Eq. 3&6

                        //double debug1 = Math.Abs(e_uij) - Math.Abs(e_uij_updated);
                        // Debug.Assert(debug1 > 0);    // After update the error should be smaller

                        #region Loop version of gradient update

                        /*
                         * for (int k = 0; k < factorCount; ++k)
                         * {
                         *  double factorOfUser = P[indexOfUser, k];
                         *  double factorOfItem_i = Q[k, indexOfItem_i];
                         *  double factorOfItem_j = Q[k, indexOfItem_j];
                         *
                         *  // TODO: Seperate user/item regularization coefficient
                         *  P[indexOfUser, k] += learnRate * (e_uij * normalized_estimate_uij * factorOfUser - regularization * factorOfUser);
                         *  // Two items are updated in different directions
                         *  Q[k, indexOfItem_i] += learnRate * (normalized_estimate_uij * factorOfItem_i - regularization * factorOfItem_i);
                         *  // Two items are updated in different directions
                         *  Q[k, indexOfItem_j] -= learnRate * (normalized_estimate_uij * factorOfItem_j - regularization * factorOfItem_j);
                         * }
                         */
                        #endregion
                    }
                }

                // Display the current regularized error see if it converges
                double currentErrorSum = 0;
                //if (epoch == 0 || epoch == maxEpoch - 1 || epoch % (int)Math.Ceiling(maxEpoch * 0.1) == 4)
                if (true)
                {
                    double eSum = 0;
                    foreach (var pair in PR_train.PreferenceRelationsByUser)
                    {
                        int          indexOfUser = pair.Key;
                        SparseMatrix preferenceRelationsOfUser = pair.Value;

                        // For each preference relation of this user, update the latent feature vectors
                        foreach (var entry in preferenceRelationsOfUser.EnumerateIndexed(Zeros.AllowSkip))
                        {
                            int indexOfItem_i = entry.Item1;
                            int indexOfItem_j = entry.Item2;

                            if (indexOfItem_i >= indexOfItem_j)
                            {
                                continue;
                            }

                            double prefRelation_uij = 0;
                            if (entry.Item3 == Config.Preferences.Preferred)
                            {
                                prefRelation_uij = 1.0;
                            }
                            else if (entry.Item3 == Config.Preferences.EquallyPreferred)
                            {
                                prefRelation_uij = 0.5;
                            }
                            else if (entry.Item3 == Config.Preferences.LessPreferred)
                            {
                                prefRelation_uij = 0.0;
                            }
                            else
                            {
                                Debug.Assert(true, "Should not be here.");
                            }

                            // TODO: Maybe it can be faster to do two dot products to remove the substraction (lose sparse  property I think)
                            double estimate_uij            = P[indexOfUser].DotProduct(Q[indexOfItem_i] - Q[indexOfItem_j]); // Eq. 2
                            double normalized_estimate_uij = SpecialFunctions.InverseLogit(estimate_uij);                    // Eq. 2
                            eSum += Math.Pow((prefRelation_uij - normalized_estimate_uij), 2);                               // Sum the error of this preference relation

                            // Sum the regularization term
                            //for (int k = 0; k < factorCount; ++k)
                            // {
                            //     eSum += (regularizationOfUser * 0.5) * (Math.Pow(P[indexOfUser, k], 2)
                            //         + Math.Pow(Q[k, indexOfItem_i], 2) + Math.Pow(Q[k, indexOfItem_j], 2));
                            // }
                        }
                    }
                    double regularizationPenaty = regularizationOfUser * P.Sum(x => x.SquaredSum());
                    regularizationPenaty += regularizationOfItem * Q.Sum(x => x.SquaredSum());
                    eSum += regularizationPenaty;

                    // Record the current error
                    currentErrorSum = eSum;

                    Utils.PrintEpoch("Epoch", epoch, maxEpoch, "Learning error", eSum.ToString("0.0"), true);
                    // Stop the learning if the regularized error falls below a certain threshold
                    // Actually we only check it once every several epoches
                    if (previousErrorSum - currentErrorSum < 0.0001)
                    {
                        Console.WriteLine("Improvment less than 0.0001, learning stopped.");
                        break;
                    }
                    previousErrorSum = currentErrorSum;
                }
            }
        }
Пример #13
0
        // We need to directly compute the position matrix because the PR would be too big to fit into memory
        public static SparseMatrix PredictPrefRelations(PrefRelations PR_train, Dictionary<int, List<int>> PR_unknown,
    int maxEpoch, double learnRate, double regularizationOfUser, double regularizationOfItem, int factorCount, List<double> quantizer)
        {
            // Latent features
            List<Vector<double>> P;
            List<Vector<double>> Q;
            //Matrix<double> P;
            //Matrix<double> Q;


            //SparseMatrix positionMatrix = new SparseMatrix(PR_train.UserCount, PR_train.ItemCount);
            Vector<double>[] positionMatrixCache = new Vector<double>[PR_train.UserCount];
            LearnLatentFeatures(PR_train, maxEpoch, learnRate, regularizationOfUser, regularizationOfItem, factorCount, out P, out Q);

            //PrefRelations PR_predicted = new PrefRelations(PR_train.ItemCount);

            Object lockMe = new Object();
            Parallel.ForEach(PR_unknown, user =>
            {
                Utils.PrintEpoch("Epoch", user.Key, PR_unknown.Count);
                int indexOfUser = user.Key;
                List<int> unknownItemsOfUser = user.Value;
                //SparseMatrix predictedPreferencesOfUser = new SparseMatrix(PR_train.ItemCount, PR_train.ItemCount);
                List<Tuple<int, int, double>> predictedPreferencesOfUserCache = new List<Tuple<int, int, double>>();

                // Predict each unknown preference
                foreach (int indexOfItem_i in unknownItemsOfUser)
                {
                    foreach (int indexOfItem_j in unknownItemsOfUser)
                    {
                        if (indexOfItem_i == indexOfItem_j) continue;
                        double estimate_uij = P[indexOfUser].DotProduct(Q[indexOfItem_i] - Q[indexOfItem_j]);   // Eq. 2
                        double normalized_estimate_uij = Core.SpecialFunctions.InverseLogit(estimate_uij);   // pi_uij in paper
                        predictedPreferencesOfUserCache.Add(new Tuple<int, int, double>(indexOfItem_i, indexOfItem_j, normalized_estimate_uij));
                        //predictedPreferencesOfUser[indexOfItem_i, indexOfItem_j] = normalized_estimate_uij;
                    }
                }

                // Note: it shows better performance to not quantize here
                /*
                DataMatrix predictedPreferencesOfUser = 
                    new DataMatrix(SparseMatrix.OfIndexed(PR_train.ItemCount, PR_train.ItemCount, predictedPreferencesOfUserCache));
                predictedPreferencesOfUser.Quantization(0, 1.0, quantizer);    
                Vector<double> positionsOfUser = PrefRelations.PreferencesToPositions(predictedPreferencesOfUser.Matrix);
                */
                
                double[] positionByItem = new double[PR_train.ItemCount];
                foreach(var triplet in predictedPreferencesOfUserCache)
                {
                    int indexOfItem_i = triplet.Item1;
                    int indexOfItem_j = triplet.Item2;
                    double preference = triplet.Item3;
                    if(preference > 0.5)
                    {
                        positionByItem[indexOfItem_i]++;
                        positionByItem[indexOfItem_j]--;
                    }
                    else if(preference < 0.5)
                    {
                        positionByItem[indexOfItem_i]--;
                        positionByItem[indexOfItem_j]++;
                    }
                }

                int normalizationTerm = unknownItemsOfUser.Count * 2 - 2;
                for (int i = 0; i < positionByItem.Length; i ++ )
                {
                    if (positionByItem[i]!=0)
                        positionByItem[i] /= normalizationTerm;
                }
                
                Vector<double> positionsOfUser = SparseVector.OfEnumerable(positionByItem);
                
                lock (lockMe)
                {
                    positionMatrixCache[indexOfUser] = positionsOfUser;
                    //positionMatrix.SetRow(indexOfUser, positionsOfUser);
                    //PR_predicted[indexOfUser] = predictedPreferencesOfUser;
                }
            });

            return SparseMatrix.OfRowVectors(positionMatrixCache);
        }
Пример #14
0
        private static void LearnLatentFeatures(PrefRelations PR_train, int maxEpoch, 
            double learnRate, double regularizationOfUser, double regularizationOfItem,
            int factorCount, out List<Vector<double>> P, out List<Vector<double>> Q)
        {
            //regularizationOfUser = 0;
            //regularizationOfItem = 0;
            int userCount = PR_train.UserCount;
            int itemCount = PR_train.ItemCount;

            // User latent vectors with default seed
            P = new List<Vector<double>>();
            Q = new List<Vector<double>>();
            ContinuousUniform uniformDistribution = new ContinuousUniform(0, 0.1, new Random(Config.Seed));
            //var p = Utils.CreateRandomMatrixFromUniform(userCount, factorCount, 0, 0.1, Config.Seed);
            for (int i = 0; i < userCount; i++ )
            {
                P.Add(DenseVector.CreateRandom(factorCount,uniformDistribution));
            }
            for (int i = 0; i < itemCount; i++)
            {
                Q.Add(DenseVector.CreateRandom(factorCount, uniformDistribution));
            }
             //   P = Utils.CreateRandomMatrixFromUniform(userCount, factorCount, 0, 0.1, Config.Seed);
            // Item latent vectors with a different seed
            //Q = Utils.CreateRandomMatrixFromUniform(factorCount, itemCount, 0, 0.1, Config.Seed + 1);

            // SGD
            double previousErrorSum = long.MaxValue;
            for (int epoch = 0; epoch < maxEpoch; ++epoch)
            {
                // For each epoch, we will iterate through all 
                // preference relations of all users

                // Loop through each user
                foreach (var pair in PR_train.PreferenceRelationsByUser)
                {
                    int indexOfUser = pair.Key;
                    SparseMatrix preferenceRelationsOfUser = pair.Value;

                    // For each preference relation of this user, update the latent feature vectors
                    foreach (var entry in preferenceRelationsOfUser.EnumerateIndexed(Zeros.AllowSkip))
                    {
                        int indexOfItem_i = entry.Item1;
                        int indexOfItem_j = entry.Item2;
                        //Console.WriteLine(preferenceRelationsOfUser[indexOfItem_i, indexOfItem_j]);
                        //Console.WriteLine(preferenceRelationsOfUser[indexOfItem_j, indexOfItem_i]);
                        if (indexOfItem_i >= indexOfItem_j) continue;

                        // Warning: here we need to convert the customized preference indicators
                        // from 1,2,3 into 0,0.5,1 for match the scale of predicted pi, which is in range [0,1] 
                        double prefRelation_uij = 0;
                        if(entry.Item3 == Config.Preferences.Preferred){prefRelation_uij = 1.0;}
                        else if (entry.Item3 == Config.Preferences.EquallyPreferred){prefRelation_uij = 0.5;}
                        else if (entry.Item3 == Config.Preferences.LessPreferred){prefRelation_uij = 0.0;}
                        else{Debug.Assert(true, "Should not be here.");}
                        
                        // TODO: Maybe it can be faster to do two dot products to remove the substraction (lose sparse  property I think)
                        double PQ_ui = P[indexOfUser].DotProduct(Q[indexOfItem_i]);
                        double PQ_uj = P[indexOfUser].DotProduct(Q[indexOfItem_j]);
                        double estimate_uij = PQ_ui - PQ_uj;
                        //double estimate_uij = P.Row(indexOfUser).DotProduct(Q.Column(indexOfItem_i) - Q.Column(indexOfItem_j));   // Eq. 2
                        
                        
                        double exp_estimate_uij = Math.Exp(estimate_uij);   // enumerator in Eq. 2
                        double normalized_estimate_uij = SpecialFunctions.InverseLogit(estimate_uij);   // pi_uij in paper
                        
                        //Debug.Assert(prefRelation_uij >= 0 && prefRelation_uij <= 1);
                        //Debug.Assert(normalized_estimate_uij >= 0 && normalized_estimate_uij <= 1);


                        // The error term in Eq. 6-9. Note that the author's paper incorrectly puts a power on the error
                        double e_uij = prefRelation_uij - normalized_estimate_uij;
                        //double e_uij = Math.Pow(prefRelation_uij - normalized_estimate_uij, 2) ;  // from Eq. 3&6
                        double e_uij_derivative = (e_uij * normalized_estimate_uij) / (1 + exp_estimate_uij);

                        // Update feature vectors
                        Vector<double> P_u = P[indexOfUser];
                        Vector<double> Q_i = Q[indexOfItem_i];
                        Vector<double> Q_j = Q[indexOfItem_j];
                        Vector<double> Q_ij = Q_i - Q_j;
   
                        P[indexOfUser] += Q_ij.Multiply(e_uij_derivative * learnRate) - P_u.Multiply(regularizationOfUser * learnRate);

                        // Eq. 7, note that the author's paper incorrectly writes + regularization 
                        //Vector<double> P_u_updated = P_u + (Q_ij.Multiply(e_uij_derivative) - P_u.Multiply(regularizationOfUser)).Multiply(learnRate);
                        //P[indexOfUser] = P_u_updated;
                        Vector<double> P_u_derivative = P_u.Multiply(e_uij_derivative * learnRate);
                        // Eq. 8, note that the author's paper incorrectly writes + regularization 
                        //Vector<double> Q_i_updated = Q_i + (P_u_derivative - Q_i.Multiply(regularizationOfItem * learnRate));
                        //Q[indexOfItem_i] = Q_i_updated;

                        Q[indexOfItem_i] += (P_u_derivative - Q_i.Multiply(regularizationOfItem * learnRate));

                        // Eq. 9, note that the author's paper incorrectly writes + regularization 
                        //Vector<double> Q_j_updated = Q_j - (P_u_derivative - Q_j.Multiply(regularizationOfItem * learnRate));
                        //Q[indexOfItem_j] =Q_j_updated;
                        Q[indexOfItem_j] -= (P_u_derivative - Q_j.Multiply(regularizationOfItem * learnRate));

                        double estimate_uij_updated = P[indexOfUser].DotProduct(Q[indexOfItem_i] - Q[indexOfItem_j]);   // Eq. 2
                        double exp_estimate_uij_updated = Math.Exp(estimate_uij_updated);   // enumerator in Eq. 2
                        double normalized_estimate_uij_updated = SpecialFunctions.InverseLogit(estimate_uij_updated);   // pi_uij in paper
                        //double e_uij_updated = Math.Pow(prefRelation_uij - normalized_estimate_uij_updated, 2);  // from Eq. 3&6
                        double e_uij_updated = prefRelation_uij - normalized_estimate_uij_updated;  // from Eq. 3&6

                        //double debug1 = Math.Abs(e_uij) - Math.Abs(e_uij_updated);
                        // Debug.Assert(debug1 > 0);    // After update the error should be smaller

                        #region Loop version of gradient update
                        /*
                        for (int k = 0; k < factorCount; ++k)
                        {
                            double factorOfUser = P[indexOfUser, k];
                            double factorOfItem_i = Q[k, indexOfItem_i];
                            double factorOfItem_j = Q[k, indexOfItem_j];

                            // TODO: Seperate user/item regularization coefficient
                            P[indexOfUser, k] += learnRate * (e_uij * normalized_estimate_uij * factorOfUser - regularization * factorOfUser);
                            // Two items are updated in different directions
                            Q[k, indexOfItem_i] += learnRate * (normalized_estimate_uij * factorOfItem_i - regularization * factorOfItem_i);
                            // Two items are updated in different directions
                            Q[k, indexOfItem_j] -= learnRate * (normalized_estimate_uij * factorOfItem_j - regularization * factorOfItem_j);
                        }
                        */
                        #endregion
                    }
                }

                // Display the current regularized error see if it converges
                double currentErrorSum = 0;
                //if (epoch == 0 || epoch == maxEpoch - 1 || epoch % (int)Math.Ceiling(maxEpoch * 0.1) == 4)
                if (true)
                {
                    double eSum = 0;
                    foreach (var pair in PR_train.PreferenceRelationsByUser)
                    {
                        int indexOfUser = pair.Key;
                        SparseMatrix preferenceRelationsOfUser = pair.Value;

                        // For each preference relation of this user, update the latent feature vectors
                        foreach (var entry in preferenceRelationsOfUser.EnumerateIndexed(Zeros.AllowSkip))
                        {
                            int indexOfItem_i = entry.Item1;
                            int indexOfItem_j = entry.Item2;

                            if (indexOfItem_i >= indexOfItem_j) continue;

                            double prefRelation_uij = 0;
                            if (entry.Item3 == Config.Preferences.Preferred) { prefRelation_uij = 1.0; }
                            else if (entry.Item3 == Config.Preferences.EquallyPreferred) { prefRelation_uij = 0.5; }
                            else if (entry.Item3 == Config.Preferences.LessPreferred) { prefRelation_uij = 0.0; }
                            else { Debug.Assert(true, "Should not be here."); }

                            // TODO: Maybe it can be faster to do two dot products to remove the substraction (lose sparse  property I think)
                            double estimate_uij = P[indexOfUser].DotProduct(Q[indexOfItem_i] - Q[indexOfItem_j]);   // Eq. 2
                            double normalized_estimate_uij = SpecialFunctions.InverseLogit(estimate_uij);   // Eq. 2
                            eSum += Math.Pow((prefRelation_uij - normalized_estimate_uij), 2);  // Sum the error of this preference relation

                            // Sum the regularization term
                            //for (int k = 0; k < factorCount; ++k)
                            // {
                            //     eSum += (regularizationOfUser * 0.5) * (Math.Pow(P[indexOfUser, k], 2)
                            //         + Math.Pow(Q[k, indexOfItem_i], 2) + Math.Pow(Q[k, indexOfItem_j], 2));
                            // }
                        }
                    }
                    double regularizationPenaty = regularizationOfUser * P.Sum(x=>x.SquaredSum());
                    regularizationPenaty += regularizationOfItem * Q.Sum(x => x.SquaredSum());
                    eSum += regularizationPenaty;

                    // Record the current error
                    currentErrorSum = eSum;

                    Utils.PrintEpoch("Epoch", epoch, maxEpoch, "Learning error", eSum.ToString("0.0"), true);
                    // Stop the learning if the regularized error falls below a certain threshold
                    // Actually we only check it once every several epoches
                    if (previousErrorSum - currentErrorSum < 0.0001)
                    {
                        Console.WriteLine("Improvment less than 0.0001, learning stopped.");
                        break;
                    }
                    previousErrorSum = currentErrorSum;
                }
            }

        }
Пример #15
0
        public static PrefRelations PredictPrefRelations(PrefRelations PR_train, SparseMatrix PR_unknown,
            int maxEpoch, double learnRate, double regularizationOfUser, double regularizationOfItem, int factorCount)
        {
            // Latent features
            List<Vector<double>> P;
            List<Vector<double>> Q;
            //Matrix<double> P;
            //Matrix<double> Q;

            LearnLatentFeatures(PR_train, maxEpoch, learnRate, regularizationOfUser, regularizationOfItem, factorCount, out P, out Q);

            PrefRelations PR_predicted = new PrefRelations(PR_train.ItemCount);

            Object lockMe = new Object();
            Parallel.ForEach(PR_unknown.EnumerateRowsIndexed(), user =>
            {
                int indexOfUser = user.Item1;
                Vector<double> unknownPreferencesOfUser = user.Item2;
                SparseMatrix predictedPreferencesOfUser = new SparseMatrix(PR_train.ItemCount, PR_train.ItemCount);

                // Predict each unknown preference
                foreach(var unknownPreference in unknownPreferencesOfUser.EnumerateIndexed(Zeros.AllowSkip))
                {
                    int indexOfItem_i = unknownPreference.Item1;
                    int indexOfItem_j = (int)unknownPreference.Item2;
                    double estimate_uij = P[indexOfUser].DotProduct(Q[indexOfItem_i] - Q[indexOfItem_j]);   // Eq. 2
                    double normalized_estimate_uij = Core.SpecialFunctions.InverseLogit(estimate_uij);   // pi_uij in paper
                    predictedPreferencesOfUser[indexOfItem_i, indexOfItem_j] = normalized_estimate_uij;
                }

                lock(lockMe)
                {
                    PR_predicted[indexOfUser] = predictedPreferencesOfUser;
                }
            });

            return PR_predicted;
        }
Пример #16
0
        private static double cosinePR(PrefRelations PR, int u_a, int u_b)
        {
            SparseMatrix pr_a = PR[u_a];
            SparseMatrix pr_b = PR[u_b];

            //Debug.Assert(pr_a.Trace() == SparseMatrix.Zero, "The diagonal of user preference relation matrix should be left empty.");
            //Debug.Assert(pr_b.Trace() == SparseMatrix.Zero, "The diagonal of user preference relation matrix should be left empty.");

            // The number of preference relations agreed between users a and b
            int agreedCount = pr_a.Fold2((count, prefOfA, prefOfB) =>
                    count + (prefOfA == prefOfB ? 1 : 0), 0, pr_b, Zeros.AllowSkip);

            #region Obsolate naive implementation
            /*
            // TODO: there should be a faster lambda way of doing this 
            // Loop through all non-zero elements
            foreach (Tuple<int, int, double> element in pr_a.EnumerateIndexed(Zeros.AllowSkip))
            {
                int item_i = element.Item1;
                int item_j = element.Item2;
                double preference_a = element.Item3;
                // Because pr_ij is just the reverse of pr_ji,
                // we count only i-j to avoid double counting
                // and also reduce the number of calling pr_b[]
                if (item_i > item_j)
                {
                    if (preference_a == pr_b[item_i, item_j])
                    {
                        ++agreedCount;
                    }
                }
            }
            */
            #endregion

            // Multiplicaiton result can be too large and cause overflow,
            // therefore we do Sqrt() first and then multiply
            double normalization = checked(Math.Sqrt((double)pr_a.NonZerosCount) * Math.Sqrt((double)pr_b.NonZerosCount));

            // Very small value
            return agreedCount / normalization;
        }
Пример #17
0
        // We need to directly compute the position matrix because the PR would be too big to fit into memory
        public static SparseMatrix PredictPrefRelations(PrefRelations PR_train, Dictionary <int, List <int> > PR_unknown,
                                                        int maxEpoch, double learnRate, double regularizationOfUser, double regularizationOfItem, int factorCount, List <double> quantizer)
        {
            // Latent features
            List <Vector <double> > P;
            List <Vector <double> > Q;

            //Matrix<double> P;
            //Matrix<double> Q;


            //SparseMatrix positionMatrix = new SparseMatrix(PR_train.UserCount, PR_train.ItemCount);
            Vector <double>[] positionMatrixCache = new Vector <double> [PR_train.UserCount];
            LearnLatentFeatures(PR_train, maxEpoch, learnRate, regularizationOfUser, regularizationOfItem, factorCount, out P, out Q);

            //PrefRelations PR_predicted = new PrefRelations(PR_train.ItemCount);

            Object lockMe = new Object();

            Parallel.ForEach(PR_unknown, user =>
            {
                Utils.PrintEpoch("Epoch", user.Key, PR_unknown.Count);
                int indexOfUser = user.Key;
                List <int> unknownItemsOfUser = user.Value;
                //SparseMatrix predictedPreferencesOfUser = new SparseMatrix(PR_train.ItemCount, PR_train.ItemCount);
                List <Tuple <int, int, double> > predictedPreferencesOfUserCache = new List <Tuple <int, int, double> >();

                // Predict each unknown preference
                foreach (int indexOfItem_i in unknownItemsOfUser)
                {
                    foreach (int indexOfItem_j in unknownItemsOfUser)
                    {
                        if (indexOfItem_i == indexOfItem_j)
                        {
                            continue;
                        }
                        double estimate_uij            = P[indexOfUser].DotProduct(Q[indexOfItem_i] - Q[indexOfItem_j]); // Eq. 2
                        double normalized_estimate_uij = Core.SpecialFunctions.InverseLogit(estimate_uij);               // pi_uij in paper
                        predictedPreferencesOfUserCache.Add(new Tuple <int, int, double>(indexOfItem_i, indexOfItem_j, normalized_estimate_uij));
                        //predictedPreferencesOfUser[indexOfItem_i, indexOfItem_j] = normalized_estimate_uij;
                    }
                }

                // Note: it shows better performance to not quantize here

                /*
                 * DataMatrix predictedPreferencesOfUser =
                 *  new DataMatrix(SparseMatrix.OfIndexed(PR_train.ItemCount, PR_train.ItemCount, predictedPreferencesOfUserCache));
                 * predictedPreferencesOfUser.Quantization(0, 1.0, quantizer);
                 * Vector<double> positionsOfUser = PrefRelations.PreferencesToPositions(predictedPreferencesOfUser.Matrix);
                 */

                double[] positionByItem = new double[PR_train.ItemCount];
                foreach (var triplet in predictedPreferencesOfUserCache)
                {
                    int indexOfItem_i = triplet.Item1;
                    int indexOfItem_j = triplet.Item2;
                    double preference = triplet.Item3;
                    if (preference > 0.5)
                    {
                        positionByItem[indexOfItem_i]++;
                        positionByItem[indexOfItem_j]--;
                    }
                    else if (preference < 0.5)
                    {
                        positionByItem[indexOfItem_i]--;
                        positionByItem[indexOfItem_j]++;
                    }
                }

                int normalizationTerm = unknownItemsOfUser.Count * 2 - 2;
                for (int i = 0; i < positionByItem.Length; i++)
                {
                    if (positionByItem[i] != 0)
                    {
                        positionByItem[i] /= normalizationTerm;
                    }
                }

                Vector <double> positionsOfUser = SparseVector.OfEnumerable(positionByItem);

                lock (lockMe)
                {
                    positionMatrixCache[indexOfUser] = positionsOfUser;
                    //positionMatrix.SetRow(indexOfUser, positionsOfUser);
                    //PR_predicted[indexOfUser] = predictedPreferencesOfUser;
                }
            });

            return(SparseMatrix.OfRowVectors(positionMatrixCache));
        }
Пример #18
0
        public static Dictionary <int, List <int> > RecommendTopN(PrefRelations PR_train, int K, List <int> targetUsers, int topN)
        {
            Dictionary <int, List <int> > topNItemsByUser = new Dictionary <int, List <int> >(targetUsers.Count);

            int          userCount      = PR_train.UserCount;
            int          itemCount      = PR_train.ItemCount;
            SparseMatrix positionMatrix = PR_train.GetPositionMatrix();

            // Make recommendations to each target user
            foreach (int indexOfUser in targetUsers)
            {
                Utils.PrintEpoch("Current user/total", indexOfUser, targetUsers.Count);

                // TODO: should have a default list of popular items in case of cold users
                Dictionary <int, double> topNItems                = new Dictionary <int, double>(topN); // To store recommendations for indexOfUser
                Dictionary <int, double> topKNeighbors            = KNNCore.GetTopKNeighborsByUser(PR_train.UserSimilarities, indexOfUser, K);
                SparseVector             predictedPositionsOfUser = new SparseVector(itemCount);

                // Compute the predicted position of each item for indexOfUser
                for (int indexOfItem = 0; indexOfItem < itemCount; ++indexOfItem)
                {
                    // Compute the position of this item for the user
                    // by combining neighbors' positions on this item
                    double weightedSum   = 0;
                    double weightSum     = 0;
                    int    itemSeenCount = 0;
                    foreach (KeyValuePair <int, double> neighbor in topKNeighbors)
                    {
                        int    indexOfNeighbor        = neighbor.Key;
                        double similarityOfNeighbor   = neighbor.Value;
                        double itemPositionOfNeighbor = positionMatrix[indexOfNeighbor, indexOfItem];

                        // TODO: Zero means it is not seen by the neighbor but
                        // it may also be the position value of 0
                        if (itemPositionOfNeighbor != 0)
                        {
                            weightSum   += similarityOfNeighbor;
                            weightedSum += itemPositionOfNeighbor * similarityOfNeighbor;
                            itemSeenCount++;
                        }
                    }

                    // If any neighbor has seen this item
                    if (itemSeenCount != 0)
                    {
                        // TODO: Add user mean may improve the performance
                        predictedPositionsOfUser[indexOfItem] = weightedSum / weightSum;
                    }
                }
                List <int> indexesOfItemSortedByPosition = Enumerable.Range(0, itemCount).ToList();

                Sorting.Sort(predictedPositionsOfUser, indexesOfItemSortedByPosition);
                indexesOfItemSortedByPosition.Reverse(); // Make it descending order by position
                // Add the top N items for user uid
                topNItemsByUser[indexOfUser] = indexesOfItemSortedByPosition.GetRange(0, topN);
            }

            return(topNItemsByUser);

            #region Old version

            /*
             * //===============Initialize variables==================
             *
             * // Recommendations are stored here indexed by user id
             * Dictionary<int, List<int>> userRecommendations = new Dictionary<int, List<int>>(targetUsers.Count);
             *
             * int userCount = PR_train.UserCount;
             * int itemCount = PR_train.ItemCount;
             *
             * // Build the item position matrix
             * // each element indicates the position(kind of goodness) of an item to the user
             * SparseMatrix itemPositions = new SparseMatrix(userCount, itemCount);
             *
             * Object lockMe = new Object();
             * Parallel.ForEach(PR_train.GetAllPreferenceRelations, pair =>
             * {
             *  int uid = pair.Key;
             *  Utilities.PrintEpoch("Current user/total", uid, userCount);
             *  SparseMatrix userPreferences = pair.Value;
             *  foreach (Tuple<int, Vector<double>> preferences in userPreferences.EnumerateRowsIndexed())
             *  {
             *      int iid = preferences.Item1;
             *      SparseVector iidPreferences = SparseVector.OfVector(preferences.Item2);
             *      // The number of items that are preferred to item iid
             *      int preferredCount = 0;
             *      // The number of items that are less preferred to item iid
             *      int lessPreferredCount = 0;
             *      // The number of items (other than item iid) that are equally preferred to item iid
             *      // TODO: I'm not sure if we should count unknown preferences or not?
             *      int equallyPreferredCount = 0;
             *
             *      // Note: don't use the Count() method it won't skip Zeros
             *      foreach (double preference in iidPreferences.Enumerate(Zeros.AllowSkip))
             *      {
             *          if (preference == Config.Preferences.Preferred)
             ++preferredCount;
             *          else if (preference == Config.Preferences.LessPreferred)
             ++lessPreferredCount;
             *          else if (preference == Config.Preferences.EquallyPreferred)
             ++equallyPreferredCount;
             *          else { Debug.Assert(false, "We should not see any non-match value here."); }
             *      }
             *
             *      double position = ((double)lessPreferredCount - preferredCount) / (preferredCount + lessPreferredCount + equallyPreferredCount);
             *
             *      Debug.Assert(position >= -1 && position <= 1);  // According to the paper
             *      if (position == 0) { Debug.Assert(preferredCount == lessPreferredCount); }  // According to the paper
             *
             *      lock (lockMe)
             *      {
             *          itemPositions[uid, iid] = position;
             *      }
             *  }
             * });
             *
             * // Need to cache the items appeared in each user's profile
             * // as we won't consider unseen items as recommendations
             * Dictionary<int, List<int>> seenItemsByUser = PR_train.GetSeenItemsByUser();
             *
             * Matrix positionMatrix = PR_train.GetPositionMatrix();
             *
             * Console.WriteLine("Recommending user/total");
             *
             * // Make recommendations for each target user
             * foreach (int uid in targetUsers)
             * {
             *
             *  Utilities.PrintEpoch("Current user/total", uid, targetUsers.Count);
             *
             *  // TODO: should have a default list of popular items in case of cold users
             *  Dictionary<int, double> topN = new Dictionary<int, double>(topNCount);   // To store recommendations for user uid
             *
             *  Dictionary<int, double> topK = KNNCore.GetTopK(PR_train.UserSimilarities, uid, K);
             *
             *  // Get a list of all candidate items
             *  List<int> candidateItems = new List<int>();
             *  foreach (int uid_neighbor in topK.Keys)
             *  {
             *      // TODO: union will remove duplicates, seems to be expensive here
             *      candidateItems = candidateItems.Union(seenItemsByUser[uid_neighbor]).ToList();
             *  }
             *
             *  // Loop through all candidate items
             *  double minPosition = double.MinValue;
             *  int min_iid = int.MinValue;
             *  foreach (int iid in candidateItems)
             *  {
             *      // Compute the average position on item iid given
             *      // by the top K neighbors. Each position is weighted
             *      // by the similarity to the target user
             *      double weightedSum = 0;
             *      double weightSum = 0;
             *      foreach (KeyValuePair<int, double> neighbor in topK)
             *      {
             *          int uidNeighbor = neighbor.Key;
             *          double similarity = neighbor.Value;
             *          double iidPosition = itemPositions[uidNeighbor, iid];
             *          // TODO: check the standard KNN, we should skip the unseen items somehow!
             *          //if (neighborRating != 0)
             *          // The weightSum serves as the normalization term
             *          // it needs abs() because some metric such as Pearson
             *          // may produce negative weights
             *          weightSum += Math.Abs(similarity);
             *          weightedSum += iidPosition * similarity;
             *      }
             *
             *      double position_predicted = weightedSum / weightSum;  // TODO: add some kind of user mean to improve?
             *
             *      // TODO: should have a default list of popular items in case of cold users
             *
             *      if (topN.Count < topNCount)  // Fill the top N list untill it is full
             *      {
             *          topN[iid] = position_predicted;
             *          if (topN.Count == topNCount)
             *          {
             *              // Find the item with least position when we have N items in the list
             *              min_iid = topN.Aggregate((l, r) => l.Value < r.Value ? l : r).Key;
             *              minPosition = topN[min_iid];
             *          }
             *      }
             *      else if (position_predicted > minPosition)
             *      {
             *          // Replace the least similar neighbor
             *          topN.Remove(min_iid);
             *          topN[iid] = position_predicted;
             *
             *          // Find the item with least position
             *          min_iid = topN.Aggregate((l, r) => l.Value < r.Value ? l : r).Key;
             *          minPosition = topN[min_iid];
             *      }
             *  }
             *  // Add the top N items for user uid
             *  userRecommendations[uid] = topN.Keys.ToList();
             * }
             *
             * return userRecommendations;
             */
            #endregion
        }