public static void GetCosineOfPrefRelations(PrefRelations PR, int maxCountOfNeighbors, double strongSimilarityThreshold, out SimilarityData neighborsByObject) { HashSet<Tuple<int, int>> foo; ComputeSimilarities(PR, SimilarityMetric.CosinePrefRelations, maxCountOfNeighbors, strongSimilarityThreshold, out neighborsByObject, out foo); }
public static DataMatrix PredictRatings(PrefRelations PR_train, DataMatrix R_unknown, int maxEpoch, double learnRate, double regularizationOfUser, double regularizationOfItem, int factorCount) { // Latent features List <Vector <double> > P; List <Vector <double> > Q; LearnLatentFeatures(PR_train, maxEpoch, learnRate, regularizationOfUser, regularizationOfItem, factorCount, out P, out Q); List <Tuple <int, int, double> > R_predicted_cache = new List <Tuple <int, int, double> >(); foreach (var data in R_unknown.Matrix.EnumerateIndexed(Zeros.AllowSkip)) { int indexOfUser = data.Item1; int indexOfItem = data.Item2; R_predicted_cache.Add(new Tuple <int, int, double>(indexOfUser, indexOfItem, P[indexOfUser].DotProduct(Q[indexOfItem]))); } DataMatrix R_predicted = new DataMatrix(SparseMatrix.OfIndexed(R_unknown.UserCount, R_unknown.ItemCount, R_predicted_cache)); //new DataMatrix(R_unknown.Matrix.PointwiseMultiply(P.Multiply(Q))); // TODO: should we do this? should we put it into [0,1]??? Seems zero entries are also converted into 0.5!Normalize the result //R_predicted.Matrix.MapInplace(x => RecSys.Core.SpecialFunctions.InverseLogit(x), Zeros.AllowSkip); return(R_predicted); }
public static PrefRelations PredictPrefRelations(PrefRelations PR_train, SparseMatrix PR_unknown, int maxEpoch, double learnRate, double regularizationOfUser, double regularizationOfItem, int factorCount) { // Latent features List <Vector <double> > P; List <Vector <double> > Q; //Matrix<double> P; //Matrix<double> Q; LearnLatentFeatures(PR_train, maxEpoch, learnRate, regularizationOfUser, regularizationOfItem, factorCount, out P, out Q); PrefRelations PR_predicted = new PrefRelations(PR_train.ItemCount); Object lockMe = new Object(); Parallel.ForEach(PR_unknown.EnumerateRowsIndexed(), user => { int indexOfUser = user.Item1; Vector <double> unknownPreferencesOfUser = user.Item2; SparseMatrix predictedPreferencesOfUser = new SparseMatrix(PR_train.ItemCount, PR_train.ItemCount); // Predict each unknown preference foreach (var unknownPreference in unknownPreferencesOfUser.EnumerateIndexed(Zeros.AllowSkip)) { int indexOfItem_i = unknownPreference.Item1; int indexOfItem_j = (int)unknownPreference.Item2; double estimate_uij = P[indexOfUser].DotProduct(Q[indexOfItem_i] - Q[indexOfItem_j]); // Eq. 2 double normalized_estimate_uij = Core.SpecialFunctions.InverseLogit(estimate_uij); // pi_uij in paper predictedPreferencesOfUser[indexOfItem_i, indexOfItem_j] = normalized_estimate_uij; } lock (lockMe) { PR_predicted[indexOfUser] = predictedPreferencesOfUser; } }); return(PR_predicted); }
public static DataMatrix PredictRatings(PrefRelations PR_train, DataMatrix R_unknown, int maxEpoch, double learnRate, double regularizationOfUser, double regularizationOfItem, int factorCount) { // Latent features List<Vector<double>> P; List<Vector<double>> Q; LearnLatentFeatures(PR_train, maxEpoch, learnRate, regularizationOfUser, regularizationOfItem, factorCount, out P, out Q); List<Tuple<int, int, double>> R_predicted_cache = new List<Tuple<int, int, double>>(); foreach(var data in R_unknown.Matrix.EnumerateIndexed(Zeros.AllowSkip)) { int indexOfUser = data.Item1; int indexOfItem = data.Item2; R_predicted_cache.Add(new Tuple<int, int, double>(indexOfUser, indexOfItem, P[indexOfUser].DotProduct(Q[indexOfItem]))); } DataMatrix R_predicted = new DataMatrix(SparseMatrix.OfIndexed(R_unknown.UserCount,R_unknown.ItemCount,R_predicted_cache)); //new DataMatrix(R_unknown.Matrix.PointwiseMultiply(P.Multiply(Q))); // TODO: should we do this? should we put it into [0,1]??? Seems zero entries are also converted into 0.5!Normalize the result //R_predicted.Matrix.MapInplace(x => RecSys.Core.SpecialFunctions.InverseLogit(x), Zeros.AllowSkip); return R_predicted; }
/// <summary> /// Switch between different metrics. /// </summary> /// <param name="PR"></param> /// <param name="similarityMetric"></param> /// <returns></returns> private static void ComputeSimilarities(PrefRelations PR, Metric.SimilarityMetric similarityMetric, int maxCountOfNeighbors, double minSimilarityThreshold, out SimilarityData neighborsByObject, out HashSet<Tuple<int, int>> strongSimilarityIndicators) { int dimension = PR.UserCount; HashSet<Tuple<int, int>> strongSimilarityIndicators_out = new HashSet<Tuple<int, int>>(); SimilarityData neighborsByObject_out = new SimilarityData(maxCountOfNeighbors); // Compute similarity for the lower triangular Object lockMe = new Object(); Parallel.For(0, dimension, i => { Utils.PrintEpoch("Progress current/total", i, dimension); for (int j = 0; j < dimension; j++) { if (i == j) { continue; } // Skip self similarity else if (i > j) { switch (similarityMetric) { case SimilarityMetric.CosinePrefRelations: double cosinePR = Metric.cosinePR(PR, i, j); lock (lockMe) { if (cosinePR > minSimilarityThreshold) { strongSimilarityIndicators_out.Add(new Tuple<int, int>(i, j)); } neighborsByObject_out.AddSimilarityData(i, j, cosinePR); neighborsByObject_out.AddSimilarityData(j, i, cosinePR); } break; // More metrics to be added here. } } } }); neighborsByObject = neighborsByObject_out; strongSimilarityIndicators = strongSimilarityIndicators_out; }
// TODO: Scalar preference relations based on Bradley-Terry model public static PrefRelations CreateScalar(DataMatrix R) { int userCount = R.UserCount; int itemCount = R.ItemCount; PrefRelations PR = new PrefRelations(itemCount); // Create a preference matrix for each user Object lockMe = new Object(); Parallel.ForEach(R.Users, user => { int userIndex = user.Item1; RatingVector userRatings = new RatingVector(user.Item2); Utils.PrintEpoch("Doing user/total", userIndex, userCount); // The diagonal refer to the i-i item pair SparseMatrix userPreferences = new SparseMatrix(itemCount); // The diagonal is left empty! //SparseMatrix.OfMatrix(Matrix.Build.SparseDiagonal(itemCount, Config.Preferences.EquallyPreferred)); // TODO: Use Vector.Map2 to replace the following two foreach loops // Here we need to compare each pair of items rated by this user foreach (Tuple<int, double> left in userRatings.Ratings) { int leftItemIndex = left.Item1; double leftItemRating = left.Item2; foreach (Tuple<int, double> right in userRatings.Ratings) { int rightItemIndex = right.Item1; // TODO: We could compute only the lower triangular, // and uppwer will be a negative mirror // Let's do it directly at this stage double rightItemRating = right.Item2; Debug.Assert(rightItemRating != 0 && leftItemRating != 0); // Skip the diagonal if (leftItemIndex == rightItemIndex) { continue; } userPreferences[leftItemIndex, rightItemIndex] = 0.1 * (leftItemRating - rightItemRating + 5);//(double)leftItemRating / (leftItemRating + rightItemRating); } } // Because pr's upper triangular should be a mirror of the lower triangular Debug.Assert((userPreferences.NonZerosCount).IsEven()); double debug1 = (Math.Pow(((SparseVector)R.GetRow(userIndex)).NonZerosCount, 2) - ((SparseVector)R.GetRow(userIndex)).NonZerosCount); double debug2 = userPreferences.NonZerosCount; Debug.Assert(debug1 == debug2); lock (lockMe) { // Copy similarity values from lower triangular to upper triangular //pr_uid = DenseMatrix.OfMatrix(pr_uid + pr_uid.Transpose() - DenseMatrix.CreateIdentity(pr_uid.RowCount)); PR[userIndex] = userPreferences; } }); return PR; }
public string GetReadyForOrdinal(bool saveLoadedData = true) { if (!ReadyForNumerical) { GetReadyForNumerical(); } if (ReadyForOrdinal) { return "Is ready."; } StringBuilder log = new StringBuilder(); Utils.StartTimer(); log.AppendLine(Utils.PrintHeading("Prepare preferecen relation data")); Console.WriteLine("Converting R_train into PR_train"); log.AppendLine("Converting R_train into PR_train"); PR_train = PrefRelations.CreateDiscrete(R_train); //Console.WriteLine("Converting R_test into PR_test"); //log.AppendLine("Converting R_test into PR_test"); //PR_test = PrefRelations.CreateDiscrete(R_test); log.AppendLine(Utils.StopTimer()); #region Prepare similarity data if (File.Exists(GetDataFileName("USP")) && File.Exists(GetDataFileName("ISP")) && File.Exists(GetDataFileName("SSIIP"))) { Utils.StartTimer(); Utils.PrintHeading("Load user, item, indicators variables (Pref based)"); UserSimilaritiesOfPref = Utils.IO<SimilarityData>.LoadObject(GetDataFileName("USP")); ItemSimilaritiesOfPref = Utils.IO<SimilarityData>.LoadObject(GetDataFileName("ISP")); StrongSimilarityIndicatorsByItemPref = Utils.IO<HashSet<Tuple<int, int>>>.LoadObject(GetDataFileName("SSIIP")); Utils.StopTimer(); } else { Utils.StartTimer(); Utils.PrintHeading("Compute user-user similarities (Pref based)"); Metric.GetCosineOfPrefRelations(PR_train, MaxCountOfNeighbors, StrongSimilarityThreshold, out UserSimilaritiesOfPref); Utils.StopTimer(); // For the moment, we use user-wise preferences to compute // item-item similarities, it is not the same as user-user pref similarities Utils.StartTimer(); Utils.PrintHeading("Compute item-item similarities (Pref based)"); DataMatrix PR_userwise_preferences = new DataMatrix(PR_train.GetPositionMatrix()); Metric.GetPearsonOfColumns(PR_userwise_preferences, MaxCountOfNeighbors, StrongSimilarityThreshold, out ItemSimilaritiesOfPref, out StrongSimilarityIndicatorsByItemPref); Utils.StopTimer(); if (saveLoadedData) { Utils.IO<SimilarityData>.SaveObject(UserSimilaritiesOfPref, GetDataFileName("USP")); Utils.IO<SimilarityData>.SaveObject(ItemSimilaritiesOfPref, GetDataFileName("ISP")); Utils.IO<HashSet<Tuple<int,int>>> .SaveObject(StrongSimilarityIndicatorsByItemPref, GetDataFileName("SSIIP")); } Utils.StopTimer(); } #endregion ReadyForOrdinal = true; return log.ToString(); }
// TODO: Scalar preference relations based on Bradley-Terry model public static PrefRelations CreateScalar(DataMatrix R) { int userCount = R.UserCount; int itemCount = R.ItemCount; PrefRelations PR = new PrefRelations(itemCount); // Create a preference matrix for each user Object lockMe = new Object(); Parallel.ForEach(R.Users, user => { int userIndex = user.Item1; RatingVector userRatings = new RatingVector(user.Item2); Utils.PrintEpoch("Doing user/total", userIndex, userCount); // The diagonal refer to the i-i item pair SparseMatrix userPreferences = new SparseMatrix(itemCount); // The diagonal is left empty! //SparseMatrix.OfMatrix(Matrix.Build.SparseDiagonal(itemCount, Config.Preferences.EquallyPreferred)); // TODO: Use Vector.Map2 to replace the following two foreach loops // Here we need to compare each pair of items rated by this user foreach (Tuple <int, double> left in userRatings.Ratings) { int leftItemIndex = left.Item1; double leftItemRating = left.Item2; foreach (Tuple <int, double> right in userRatings.Ratings) { int rightItemIndex = right.Item1; // TODO: We could compute only the lower triangular, // and uppwer will be a negative mirror // Let's do it directly at this stage double rightItemRating = right.Item2; Debug.Assert(rightItemRating != 0 && leftItemRating != 0); // Skip the diagonal if (leftItemIndex == rightItemIndex) { continue; } userPreferences[leftItemIndex, rightItemIndex] = 0.1 * (leftItemRating - rightItemRating + 5);//(double)leftItemRating / (leftItemRating + rightItemRating); } } // Because pr's upper triangular should be a mirror of the lower triangular Debug.Assert((userPreferences.NonZerosCount).IsEven()); double debug1 = (Math.Pow(((SparseVector)R.GetRow(userIndex)).NonZerosCount, 2) - ((SparseVector)R.GetRow(userIndex)).NonZerosCount); double debug2 = userPreferences.NonZerosCount; Debug.Assert(debug1 == debug2); lock (lockMe) { // Copy similarity values from lower triangular to upper triangular //pr_uid = DenseMatrix.OfMatrix(pr_uid + pr_uid.Transpose() - DenseMatrix.CreateIdentity(pr_uid.RowCount)); PR[userIndex] = userPreferences; } }); return(PR); }
public static DataMatrix PredictRatings(PrefRelations PR_train, DataMatrix R_unknown, int K, SimilarityData neighborsByUser) { Debug.Assert(PR_train.UserCount == R_unknown.UserCount); Debug.Assert(PR_train.ItemCount == R_unknown.ItemCount); // This matrix stores predictions DataMatrix R_predicted = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount); // This can be considered as the R_train in standard UserKNN SparseMatrix positionMatrix = PR_train.GetPositionMatrix(); DataMatrix ratingMatrixFromPositions = new DataMatrix(positionMatrix); Vector<double> meanByUser = ratingMatrixFromPositions.GetUserMeans(); Vector<double> meanByItem = ratingMatrixFromPositions.GetItemMeans(); double globalMean = ratingMatrixFromPositions.GetGlobalMean(); // Predict positions for each test user // Appears to be very fast, parallel.foreach is unnecessary foreach (Tuple<int, Vector<double>> user in R_unknown.Users) { int indexOfUser = user.Item1; Vector<double> indexesOfUnknownRatings = user.Item2; Utils.PrintEpoch("Predicting user/total", indexOfUser, PR_train.UserCount); // Note that there are more than K neighbors in the list (sorted by similarity) // we will use the top-K neighbors WHO HAVE RATED THE ITEM // For example we have 200 top neighbors, and we hope there are // K neighbors in the list have rated the item. We can't keep // everyone in the neighbor list because there are too many for large data sets var topNeighborsOfUser = neighborsByUser[indexOfUser]; double meanOfUser = meanByUser[indexOfUser]; // Loop through each position to be predicted foreach (Tuple<int, double> unknownRating in indexesOfUnknownRatings.EnumerateIndexed(Zeros.AllowSkip)) { int indexOfUnknownItem = unknownRating.Item1; // Compute the position of this item for the user // by combining neighbors' positions on this item double weightedSum = 0; double weightSum = 0; int currentTopKCount = 0; foreach (KeyValuePair<int, double> neighbor in topNeighborsOfUser) { int indexOfNeighbor = neighbor.Key; double similarityOfNeighbor = neighbor.Value; double itemPositionOfNeighbor = ratingMatrixFromPositions[indexOfNeighbor, indexOfUnknownItem]; // We count only if the neighbor has seen this item before if (itemPositionOfNeighbor != 0) { // Recall that we use a constant to hold position value 0 // we revert it back here if (itemPositionOfNeighbor == Config.ZeroInSparseMatrix) { Debug.Assert(true, "By using the PositionShift constant, we should not be in here."); itemPositionOfNeighbor = 0; } weightSum += similarityOfNeighbor; weightedSum += (itemPositionOfNeighbor - meanByUser[indexOfNeighbor]) * similarityOfNeighbor; currentTopKCount++; if(currentTopKCount>= K) { break; } } } // If any neighbor has seen this item if (currentTopKCount != 0) { // TODO: Add user mean may improve the performance R_predicted[indexOfUser, indexOfUnknownItem] = meanOfUser + weightedSum / weightSum; } else { R_predicted[indexOfUser, indexOfUnknownItem] = globalMean; } } }//); return R_predicted; }
public static Dictionary<int, List<int>> RecommendTopN(PrefRelations PR_train, int K, List<int> targetUsers, int topN) { Dictionary<int, List<int>> topNItemsByUser = new Dictionary<int, List<int>>(targetUsers.Count); int userCount = PR_train.UserCount; int itemCount = PR_train.ItemCount; SparseMatrix positionMatrix = PR_train.GetPositionMatrix(); // Make recommendations to each target user foreach (int indexOfUser in targetUsers) { Utils.PrintEpoch("Current user/total", indexOfUser, targetUsers.Count); // TODO: should have a default list of popular items in case of cold users Dictionary<int, double> topNItems = new Dictionary<int, double>(topN); // To store recommendations for indexOfUser Dictionary<int, double> topKNeighbors = KNNCore.GetTopKNeighborsByUser(PR_train.UserSimilarities, indexOfUser, K); SparseVector predictedPositionsOfUser = new SparseVector(itemCount); // Compute the predicted position of each item for indexOfUser for (int indexOfItem = 0; indexOfItem < itemCount; ++indexOfItem) { // Compute the position of this item for the user // by combining neighbors' positions on this item double weightedSum = 0; double weightSum = 0; int itemSeenCount = 0; foreach (KeyValuePair<int, double> neighbor in topKNeighbors) { int indexOfNeighbor = neighbor.Key; double similarityOfNeighbor = neighbor.Value; double itemPositionOfNeighbor = positionMatrix[indexOfNeighbor, indexOfItem]; // TODO: Zero means it is not seen by the neighbor but // it may also be the position value of 0 if (itemPositionOfNeighbor != 0) { weightSum += similarityOfNeighbor; weightedSum += itemPositionOfNeighbor * similarityOfNeighbor; itemSeenCount++; } } // If any neighbor has seen this item if (itemSeenCount != 0) { // TODO: Add user mean may improve the performance predictedPositionsOfUser[indexOfItem] = weightedSum / weightSum; } } List<int> indexesOfItemSortedByPosition = Enumerable.Range(0, itemCount).ToList(); Sorting.Sort(predictedPositionsOfUser, indexesOfItemSortedByPosition); indexesOfItemSortedByPosition.Reverse(); // Make it descending order by position // Add the top N items for user uid topNItemsByUser[indexOfUser] = indexesOfItemSortedByPosition.GetRange(0, topN); } return topNItemsByUser; #region Old version /* //===============Initialize variables================== // Recommendations are stored here indexed by user id Dictionary<int, List<int>> userRecommendations = new Dictionary<int, List<int>>(targetUsers.Count); int userCount = PR_train.UserCount; int itemCount = PR_train.ItemCount; // Build the item position matrix // each element indicates the position(kind of goodness) of an item to the user SparseMatrix itemPositions = new SparseMatrix(userCount, itemCount); Object lockMe = new Object(); Parallel.ForEach(PR_train.GetAllPreferenceRelations, pair => { int uid = pair.Key; Utilities.PrintEpoch("Current user/total", uid, userCount); SparseMatrix userPreferences = pair.Value; foreach (Tuple<int, Vector<double>> preferences in userPreferences.EnumerateRowsIndexed()) { int iid = preferences.Item1; SparseVector iidPreferences = SparseVector.OfVector(preferences.Item2); // The number of items that are preferred to item iid int preferredCount = 0; // The number of items that are less preferred to item iid int lessPreferredCount = 0; // The number of items (other than item iid) that are equally preferred to item iid // TODO: I'm not sure if we should count unknown preferences or not? int equallyPreferredCount = 0; // Note: don't use the Count() method it won't skip Zeros foreach (double preference in iidPreferences.Enumerate(Zeros.AllowSkip)) { if (preference == Config.Preferences.Preferred) ++preferredCount; else if (preference == Config.Preferences.LessPreferred) ++lessPreferredCount; else if (preference == Config.Preferences.EquallyPreferred) ++equallyPreferredCount; else { Debug.Assert(false, "We should not see any non-match value here."); } } double position = ((double)lessPreferredCount - preferredCount) / (preferredCount + lessPreferredCount + equallyPreferredCount); Debug.Assert(position >= -1 && position <= 1); // According to the paper if (position == 0) { Debug.Assert(preferredCount == lessPreferredCount); } // According to the paper lock (lockMe) { itemPositions[uid, iid] = position; } } }); // Need to cache the items appeared in each user's profile // as we won't consider unseen items as recommendations Dictionary<int, List<int>> seenItemsByUser = PR_train.GetSeenItemsByUser(); Matrix positionMatrix = PR_train.GetPositionMatrix(); Console.WriteLine("Recommending user/total"); // Make recommendations for each target user foreach (int uid in targetUsers) { Utilities.PrintEpoch("Current user/total", uid, targetUsers.Count); // TODO: should have a default list of popular items in case of cold users Dictionary<int, double> topN = new Dictionary<int, double>(topNCount); // To store recommendations for user uid Dictionary<int, double> topK = KNNCore.GetTopK(PR_train.UserSimilarities, uid, K); // Get a list of all candidate items List<int> candidateItems = new List<int>(); foreach (int uid_neighbor in topK.Keys) { // TODO: union will remove duplicates, seems to be expensive here candidateItems = candidateItems.Union(seenItemsByUser[uid_neighbor]).ToList(); } // Loop through all candidate items double minPosition = double.MinValue; int min_iid = int.MinValue; foreach (int iid in candidateItems) { // Compute the average position on item iid given // by the top K neighbors. Each position is weighted // by the similarity to the target user double weightedSum = 0; double weightSum = 0; foreach (KeyValuePair<int, double> neighbor in topK) { int uidNeighbor = neighbor.Key; double similarity = neighbor.Value; double iidPosition = itemPositions[uidNeighbor, iid]; // TODO: check the standard KNN, we should skip the unseen items somehow! //if (neighborRating != 0) // The weightSum serves as the normalization term // it needs abs() because some metric such as Pearson // may produce negative weights weightSum += Math.Abs(similarity); weightedSum += iidPosition * similarity; } double position_predicted = weightedSum / weightSum; // TODO: add some kind of user mean to improve? // TODO: should have a default list of popular items in case of cold users if (topN.Count < topNCount) // Fill the top N list untill it is full { topN[iid] = position_predicted; if (topN.Count == topNCount) { // Find the item with least position when we have N items in the list min_iid = topN.Aggregate((l, r) => l.Value < r.Value ? l : r).Key; minPosition = topN[min_iid]; } } else if (position_predicted > minPosition) { // Replace the least similar neighbor topN.Remove(min_iid); topN[iid] = position_predicted; // Find the item with least position min_iid = topN.Aggregate((l, r) => l.Value < r.Value ? l : r).Key; minPosition = topN[min_iid]; } } // Add the top N items for user uid userRecommendations[uid] = topN.Keys.ToList(); } return userRecommendations; */ #endregion }
public static DataMatrix PredictRatings(PrefRelations PR_train, DataMatrix R_unknown, int K, SimilarityData neighborsByUser) { Debug.Assert(PR_train.UserCount == R_unknown.UserCount); Debug.Assert(PR_train.ItemCount == R_unknown.ItemCount); // This matrix stores predictions DataMatrix R_predicted = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount); // This can be considered as the R_train in standard UserKNN SparseMatrix positionMatrix = PR_train.GetPositionMatrix(); DataMatrix ratingMatrixFromPositions = new DataMatrix(positionMatrix); Vector <double> meanByUser = ratingMatrixFromPositions.GetUserMeans(); Vector <double> meanByItem = ratingMatrixFromPositions.GetItemMeans(); double globalMean = ratingMatrixFromPositions.GetGlobalMean(); // Predict positions for each test user // Appears to be very fast, parallel.foreach is unnecessary foreach (Tuple <int, Vector <double> > user in R_unknown.Users) { int indexOfUser = user.Item1; Vector <double> indexesOfUnknownRatings = user.Item2; Utils.PrintEpoch("Predicting user/total", indexOfUser, PR_train.UserCount); // Note that there are more than K neighbors in the list (sorted by similarity) // we will use the top-K neighbors WHO HAVE RATED THE ITEM // For example we have 200 top neighbors, and we hope there are // K neighbors in the list have rated the item. We can't keep // everyone in the neighbor list because there are too many for large data sets var topNeighborsOfUser = neighborsByUser[indexOfUser]; double meanOfUser = meanByUser[indexOfUser]; // Loop through each position to be predicted foreach (Tuple <int, double> unknownRating in indexesOfUnknownRatings.EnumerateIndexed(Zeros.AllowSkip)) { int indexOfUnknownItem = unknownRating.Item1; // Compute the position of this item for the user // by combining neighbors' positions on this item double weightedSum = 0; double weightSum = 0; int currentTopKCount = 0; foreach (KeyValuePair <int, double> neighbor in topNeighborsOfUser) { int indexOfNeighbor = neighbor.Key; double similarityOfNeighbor = neighbor.Value; double itemPositionOfNeighbor = ratingMatrixFromPositions[indexOfNeighbor, indexOfUnknownItem]; // We count only if the neighbor has seen this item before if (itemPositionOfNeighbor != 0) { // Recall that we use a constant to hold position value 0 // we revert it back here if (itemPositionOfNeighbor == Config.ZeroInSparseMatrix) { Debug.Assert(true, "By using the PositionShift constant, we should not be in here."); itemPositionOfNeighbor = 0; } weightSum += similarityOfNeighbor; weightedSum += (itemPositionOfNeighbor - meanByUser[indexOfNeighbor]) * similarityOfNeighbor; currentTopKCount++; if (currentTopKCount >= K) { break; } } } // If any neighbor has seen this item if (currentTopKCount != 0) { // TODO: Add user mean may improve the performance R_predicted[indexOfUser, indexOfUnknownItem] = meanOfUser + weightedSum / weightSum; } else { R_predicted[indexOfUser, indexOfUnknownItem] = globalMean; } } }//); return(R_predicted); }
private static void LearnLatentFeatures(PrefRelations PR_train, int maxEpoch, double learnRate, double regularizationOfUser, double regularizationOfItem, int factorCount, out List <Vector <double> > P, out List <Vector <double> > Q) { //regularizationOfUser = 0; //regularizationOfItem = 0; int userCount = PR_train.UserCount; int itemCount = PR_train.ItemCount; // User latent vectors with default seed P = new List <Vector <double> >(); Q = new List <Vector <double> >(); ContinuousUniform uniformDistribution = new ContinuousUniform(0, 0.1, new Random(Config.Seed)); //var p = Utils.CreateRandomMatrixFromUniform(userCount, factorCount, 0, 0.1, Config.Seed); for (int i = 0; i < userCount; i++) { P.Add(DenseVector.CreateRandom(factorCount, uniformDistribution)); } for (int i = 0; i < itemCount; i++) { Q.Add(DenseVector.CreateRandom(factorCount, uniformDistribution)); } // P = Utils.CreateRandomMatrixFromUniform(userCount, factorCount, 0, 0.1, Config.Seed); // Item latent vectors with a different seed //Q = Utils.CreateRandomMatrixFromUniform(factorCount, itemCount, 0, 0.1, Config.Seed + 1); // SGD double previousErrorSum = long.MaxValue; for (int epoch = 0; epoch < maxEpoch; ++epoch) { // For each epoch, we will iterate through all // preference relations of all users // Loop through each user foreach (var pair in PR_train.PreferenceRelationsByUser) { int indexOfUser = pair.Key; SparseMatrix preferenceRelationsOfUser = pair.Value; // For each preference relation of this user, update the latent feature vectors foreach (var entry in preferenceRelationsOfUser.EnumerateIndexed(Zeros.AllowSkip)) { int indexOfItem_i = entry.Item1; int indexOfItem_j = entry.Item2; //Console.WriteLine(preferenceRelationsOfUser[indexOfItem_i, indexOfItem_j]); //Console.WriteLine(preferenceRelationsOfUser[indexOfItem_j, indexOfItem_i]); if (indexOfItem_i >= indexOfItem_j) { continue; } // Warning: here we need to convert the customized preference indicators // from 1,2,3 into 0,0.5,1 for match the scale of predicted pi, which is in range [0,1] double prefRelation_uij = 0; if (entry.Item3 == Config.Preferences.Preferred) { prefRelation_uij = 1.0; } else if (entry.Item3 == Config.Preferences.EquallyPreferred) { prefRelation_uij = 0.5; } else if (entry.Item3 == Config.Preferences.LessPreferred) { prefRelation_uij = 0.0; } else { Debug.Assert(true, "Should not be here."); } // TODO: Maybe it can be faster to do two dot products to remove the substraction (lose sparse property I think) double PQ_ui = P[indexOfUser].DotProduct(Q[indexOfItem_i]); double PQ_uj = P[indexOfUser].DotProduct(Q[indexOfItem_j]); double estimate_uij = PQ_ui - PQ_uj; //double estimate_uij = P.Row(indexOfUser).DotProduct(Q.Column(indexOfItem_i) - Q.Column(indexOfItem_j)); // Eq. 2 double exp_estimate_uij = Math.Exp(estimate_uij); // enumerator in Eq. 2 double normalized_estimate_uij = SpecialFunctions.InverseLogit(estimate_uij); // pi_uij in paper //Debug.Assert(prefRelation_uij >= 0 && prefRelation_uij <= 1); //Debug.Assert(normalized_estimate_uij >= 0 && normalized_estimate_uij <= 1); // The error term in Eq. 6-9. Note that the author's paper incorrectly puts a power on the error double e_uij = prefRelation_uij - normalized_estimate_uij; //double e_uij = Math.Pow(prefRelation_uij - normalized_estimate_uij, 2) ; // from Eq. 3&6 double e_uij_derivative = (e_uij * normalized_estimate_uij) / (1 + exp_estimate_uij); // Update feature vectors Vector <double> P_u = P[indexOfUser]; Vector <double> Q_i = Q[indexOfItem_i]; Vector <double> Q_j = Q[indexOfItem_j]; Vector <double> Q_ij = Q_i - Q_j; P[indexOfUser] += Q_ij.Multiply(e_uij_derivative * learnRate) - P_u.Multiply(regularizationOfUser * learnRate); // Eq. 7, note that the author's paper incorrectly writes + regularization //Vector<double> P_u_updated = P_u + (Q_ij.Multiply(e_uij_derivative) - P_u.Multiply(regularizationOfUser)).Multiply(learnRate); //P[indexOfUser] = P_u_updated; Vector <double> P_u_derivative = P_u.Multiply(e_uij_derivative * learnRate); // Eq. 8, note that the author's paper incorrectly writes + regularization //Vector<double> Q_i_updated = Q_i + (P_u_derivative - Q_i.Multiply(regularizationOfItem * learnRate)); //Q[indexOfItem_i] = Q_i_updated; Q[indexOfItem_i] += (P_u_derivative - Q_i.Multiply(regularizationOfItem * learnRate)); // Eq. 9, note that the author's paper incorrectly writes + regularization //Vector<double> Q_j_updated = Q_j - (P_u_derivative - Q_j.Multiply(regularizationOfItem * learnRate)); //Q[indexOfItem_j] =Q_j_updated; Q[indexOfItem_j] -= (P_u_derivative - Q_j.Multiply(regularizationOfItem * learnRate)); double estimate_uij_updated = P[indexOfUser].DotProduct(Q[indexOfItem_i] - Q[indexOfItem_j]); // Eq. 2 double exp_estimate_uij_updated = Math.Exp(estimate_uij_updated); // enumerator in Eq. 2 double normalized_estimate_uij_updated = SpecialFunctions.InverseLogit(estimate_uij_updated); // pi_uij in paper //double e_uij_updated = Math.Pow(prefRelation_uij - normalized_estimate_uij_updated, 2); // from Eq. 3&6 double e_uij_updated = prefRelation_uij - normalized_estimate_uij_updated; // from Eq. 3&6 //double debug1 = Math.Abs(e_uij) - Math.Abs(e_uij_updated); // Debug.Assert(debug1 > 0); // After update the error should be smaller #region Loop version of gradient update /* * for (int k = 0; k < factorCount; ++k) * { * double factorOfUser = P[indexOfUser, k]; * double factorOfItem_i = Q[k, indexOfItem_i]; * double factorOfItem_j = Q[k, indexOfItem_j]; * * // TODO: Seperate user/item regularization coefficient * P[indexOfUser, k] += learnRate * (e_uij * normalized_estimate_uij * factorOfUser - regularization * factorOfUser); * // Two items are updated in different directions * Q[k, indexOfItem_i] += learnRate * (normalized_estimate_uij * factorOfItem_i - regularization * factorOfItem_i); * // Two items are updated in different directions * Q[k, indexOfItem_j] -= learnRate * (normalized_estimate_uij * factorOfItem_j - regularization * factorOfItem_j); * } */ #endregion } } // Display the current regularized error see if it converges double currentErrorSum = 0; //if (epoch == 0 || epoch == maxEpoch - 1 || epoch % (int)Math.Ceiling(maxEpoch * 0.1) == 4) if (true) { double eSum = 0; foreach (var pair in PR_train.PreferenceRelationsByUser) { int indexOfUser = pair.Key; SparseMatrix preferenceRelationsOfUser = pair.Value; // For each preference relation of this user, update the latent feature vectors foreach (var entry in preferenceRelationsOfUser.EnumerateIndexed(Zeros.AllowSkip)) { int indexOfItem_i = entry.Item1; int indexOfItem_j = entry.Item2; if (indexOfItem_i >= indexOfItem_j) { continue; } double prefRelation_uij = 0; if (entry.Item3 == Config.Preferences.Preferred) { prefRelation_uij = 1.0; } else if (entry.Item3 == Config.Preferences.EquallyPreferred) { prefRelation_uij = 0.5; } else if (entry.Item3 == Config.Preferences.LessPreferred) { prefRelation_uij = 0.0; } else { Debug.Assert(true, "Should not be here."); } // TODO: Maybe it can be faster to do two dot products to remove the substraction (lose sparse property I think) double estimate_uij = P[indexOfUser].DotProduct(Q[indexOfItem_i] - Q[indexOfItem_j]); // Eq. 2 double normalized_estimate_uij = SpecialFunctions.InverseLogit(estimate_uij); // Eq. 2 eSum += Math.Pow((prefRelation_uij - normalized_estimate_uij), 2); // Sum the error of this preference relation // Sum the regularization term //for (int k = 0; k < factorCount; ++k) // { // eSum += (regularizationOfUser * 0.5) * (Math.Pow(P[indexOfUser, k], 2) // + Math.Pow(Q[k, indexOfItem_i], 2) + Math.Pow(Q[k, indexOfItem_j], 2)); // } } } double regularizationPenaty = regularizationOfUser * P.Sum(x => x.SquaredSum()); regularizationPenaty += regularizationOfItem * Q.Sum(x => x.SquaredSum()); eSum += regularizationPenaty; // Record the current error currentErrorSum = eSum; Utils.PrintEpoch("Epoch", epoch, maxEpoch, "Learning error", eSum.ToString("0.0"), true); // Stop the learning if the regularized error falls below a certain threshold // Actually we only check it once every several epoches if (previousErrorSum - currentErrorSum < 0.0001) { Console.WriteLine("Improvment less than 0.0001, learning stopped."); break; } previousErrorSum = currentErrorSum; } } }
// We need to directly compute the position matrix because the PR would be too big to fit into memory public static SparseMatrix PredictPrefRelations(PrefRelations PR_train, Dictionary<int, List<int>> PR_unknown, int maxEpoch, double learnRate, double regularizationOfUser, double regularizationOfItem, int factorCount, List<double> quantizer) { // Latent features List<Vector<double>> P; List<Vector<double>> Q; //Matrix<double> P; //Matrix<double> Q; //SparseMatrix positionMatrix = new SparseMatrix(PR_train.UserCount, PR_train.ItemCount); Vector<double>[] positionMatrixCache = new Vector<double>[PR_train.UserCount]; LearnLatentFeatures(PR_train, maxEpoch, learnRate, regularizationOfUser, regularizationOfItem, factorCount, out P, out Q); //PrefRelations PR_predicted = new PrefRelations(PR_train.ItemCount); Object lockMe = new Object(); Parallel.ForEach(PR_unknown, user => { Utils.PrintEpoch("Epoch", user.Key, PR_unknown.Count); int indexOfUser = user.Key; List<int> unknownItemsOfUser = user.Value; //SparseMatrix predictedPreferencesOfUser = new SparseMatrix(PR_train.ItemCount, PR_train.ItemCount); List<Tuple<int, int, double>> predictedPreferencesOfUserCache = new List<Tuple<int, int, double>>(); // Predict each unknown preference foreach (int indexOfItem_i in unknownItemsOfUser) { foreach (int indexOfItem_j in unknownItemsOfUser) { if (indexOfItem_i == indexOfItem_j) continue; double estimate_uij = P[indexOfUser].DotProduct(Q[indexOfItem_i] - Q[indexOfItem_j]); // Eq. 2 double normalized_estimate_uij = Core.SpecialFunctions.InverseLogit(estimate_uij); // pi_uij in paper predictedPreferencesOfUserCache.Add(new Tuple<int, int, double>(indexOfItem_i, indexOfItem_j, normalized_estimate_uij)); //predictedPreferencesOfUser[indexOfItem_i, indexOfItem_j] = normalized_estimate_uij; } } // Note: it shows better performance to not quantize here /* DataMatrix predictedPreferencesOfUser = new DataMatrix(SparseMatrix.OfIndexed(PR_train.ItemCount, PR_train.ItemCount, predictedPreferencesOfUserCache)); predictedPreferencesOfUser.Quantization(0, 1.0, quantizer); Vector<double> positionsOfUser = PrefRelations.PreferencesToPositions(predictedPreferencesOfUser.Matrix); */ double[] positionByItem = new double[PR_train.ItemCount]; foreach(var triplet in predictedPreferencesOfUserCache) { int indexOfItem_i = triplet.Item1; int indexOfItem_j = triplet.Item2; double preference = triplet.Item3; if(preference > 0.5) { positionByItem[indexOfItem_i]++; positionByItem[indexOfItem_j]--; } else if(preference < 0.5) { positionByItem[indexOfItem_i]--; positionByItem[indexOfItem_j]++; } } int normalizationTerm = unknownItemsOfUser.Count * 2 - 2; for (int i = 0; i < positionByItem.Length; i ++ ) { if (positionByItem[i]!=0) positionByItem[i] /= normalizationTerm; } Vector<double> positionsOfUser = SparseVector.OfEnumerable(positionByItem); lock (lockMe) { positionMatrixCache[indexOfUser] = positionsOfUser; //positionMatrix.SetRow(indexOfUser, positionsOfUser); //PR_predicted[indexOfUser] = predictedPreferencesOfUser; } }); return SparseMatrix.OfRowVectors(positionMatrixCache); }
private static void LearnLatentFeatures(PrefRelations PR_train, int maxEpoch, double learnRate, double regularizationOfUser, double regularizationOfItem, int factorCount, out List<Vector<double>> P, out List<Vector<double>> Q) { //regularizationOfUser = 0; //regularizationOfItem = 0; int userCount = PR_train.UserCount; int itemCount = PR_train.ItemCount; // User latent vectors with default seed P = new List<Vector<double>>(); Q = new List<Vector<double>>(); ContinuousUniform uniformDistribution = new ContinuousUniform(0, 0.1, new Random(Config.Seed)); //var p = Utils.CreateRandomMatrixFromUniform(userCount, factorCount, 0, 0.1, Config.Seed); for (int i = 0; i < userCount; i++ ) { P.Add(DenseVector.CreateRandom(factorCount,uniformDistribution)); } for (int i = 0; i < itemCount; i++) { Q.Add(DenseVector.CreateRandom(factorCount, uniformDistribution)); } // P = Utils.CreateRandomMatrixFromUniform(userCount, factorCount, 0, 0.1, Config.Seed); // Item latent vectors with a different seed //Q = Utils.CreateRandomMatrixFromUniform(factorCount, itemCount, 0, 0.1, Config.Seed + 1); // SGD double previousErrorSum = long.MaxValue; for (int epoch = 0; epoch < maxEpoch; ++epoch) { // For each epoch, we will iterate through all // preference relations of all users // Loop through each user foreach (var pair in PR_train.PreferenceRelationsByUser) { int indexOfUser = pair.Key; SparseMatrix preferenceRelationsOfUser = pair.Value; // For each preference relation of this user, update the latent feature vectors foreach (var entry in preferenceRelationsOfUser.EnumerateIndexed(Zeros.AllowSkip)) { int indexOfItem_i = entry.Item1; int indexOfItem_j = entry.Item2; //Console.WriteLine(preferenceRelationsOfUser[indexOfItem_i, indexOfItem_j]); //Console.WriteLine(preferenceRelationsOfUser[indexOfItem_j, indexOfItem_i]); if (indexOfItem_i >= indexOfItem_j) continue; // Warning: here we need to convert the customized preference indicators // from 1,2,3 into 0,0.5,1 for match the scale of predicted pi, which is in range [0,1] double prefRelation_uij = 0; if(entry.Item3 == Config.Preferences.Preferred){prefRelation_uij = 1.0;} else if (entry.Item3 == Config.Preferences.EquallyPreferred){prefRelation_uij = 0.5;} else if (entry.Item3 == Config.Preferences.LessPreferred){prefRelation_uij = 0.0;} else{Debug.Assert(true, "Should not be here.");} // TODO: Maybe it can be faster to do two dot products to remove the substraction (lose sparse property I think) double PQ_ui = P[indexOfUser].DotProduct(Q[indexOfItem_i]); double PQ_uj = P[indexOfUser].DotProduct(Q[indexOfItem_j]); double estimate_uij = PQ_ui - PQ_uj; //double estimate_uij = P.Row(indexOfUser).DotProduct(Q.Column(indexOfItem_i) - Q.Column(indexOfItem_j)); // Eq. 2 double exp_estimate_uij = Math.Exp(estimate_uij); // enumerator in Eq. 2 double normalized_estimate_uij = SpecialFunctions.InverseLogit(estimate_uij); // pi_uij in paper //Debug.Assert(prefRelation_uij >= 0 && prefRelation_uij <= 1); //Debug.Assert(normalized_estimate_uij >= 0 && normalized_estimate_uij <= 1); // The error term in Eq. 6-9. Note that the author's paper incorrectly puts a power on the error double e_uij = prefRelation_uij - normalized_estimate_uij; //double e_uij = Math.Pow(prefRelation_uij - normalized_estimate_uij, 2) ; // from Eq. 3&6 double e_uij_derivative = (e_uij * normalized_estimate_uij) / (1 + exp_estimate_uij); // Update feature vectors Vector<double> P_u = P[indexOfUser]; Vector<double> Q_i = Q[indexOfItem_i]; Vector<double> Q_j = Q[indexOfItem_j]; Vector<double> Q_ij = Q_i - Q_j; P[indexOfUser] += Q_ij.Multiply(e_uij_derivative * learnRate) - P_u.Multiply(regularizationOfUser * learnRate); // Eq. 7, note that the author's paper incorrectly writes + regularization //Vector<double> P_u_updated = P_u + (Q_ij.Multiply(e_uij_derivative) - P_u.Multiply(regularizationOfUser)).Multiply(learnRate); //P[indexOfUser] = P_u_updated; Vector<double> P_u_derivative = P_u.Multiply(e_uij_derivative * learnRate); // Eq. 8, note that the author's paper incorrectly writes + regularization //Vector<double> Q_i_updated = Q_i + (P_u_derivative - Q_i.Multiply(regularizationOfItem * learnRate)); //Q[indexOfItem_i] = Q_i_updated; Q[indexOfItem_i] += (P_u_derivative - Q_i.Multiply(regularizationOfItem * learnRate)); // Eq. 9, note that the author's paper incorrectly writes + regularization //Vector<double> Q_j_updated = Q_j - (P_u_derivative - Q_j.Multiply(regularizationOfItem * learnRate)); //Q[indexOfItem_j] =Q_j_updated; Q[indexOfItem_j] -= (P_u_derivative - Q_j.Multiply(regularizationOfItem * learnRate)); double estimate_uij_updated = P[indexOfUser].DotProduct(Q[indexOfItem_i] - Q[indexOfItem_j]); // Eq. 2 double exp_estimate_uij_updated = Math.Exp(estimate_uij_updated); // enumerator in Eq. 2 double normalized_estimate_uij_updated = SpecialFunctions.InverseLogit(estimate_uij_updated); // pi_uij in paper //double e_uij_updated = Math.Pow(prefRelation_uij - normalized_estimate_uij_updated, 2); // from Eq. 3&6 double e_uij_updated = prefRelation_uij - normalized_estimate_uij_updated; // from Eq. 3&6 //double debug1 = Math.Abs(e_uij) - Math.Abs(e_uij_updated); // Debug.Assert(debug1 > 0); // After update the error should be smaller #region Loop version of gradient update /* for (int k = 0; k < factorCount; ++k) { double factorOfUser = P[indexOfUser, k]; double factorOfItem_i = Q[k, indexOfItem_i]; double factorOfItem_j = Q[k, indexOfItem_j]; // TODO: Seperate user/item regularization coefficient P[indexOfUser, k] += learnRate * (e_uij * normalized_estimate_uij * factorOfUser - regularization * factorOfUser); // Two items are updated in different directions Q[k, indexOfItem_i] += learnRate * (normalized_estimate_uij * factorOfItem_i - regularization * factorOfItem_i); // Two items are updated in different directions Q[k, indexOfItem_j] -= learnRate * (normalized_estimate_uij * factorOfItem_j - regularization * factorOfItem_j); } */ #endregion } } // Display the current regularized error see if it converges double currentErrorSum = 0; //if (epoch == 0 || epoch == maxEpoch - 1 || epoch % (int)Math.Ceiling(maxEpoch * 0.1) == 4) if (true) { double eSum = 0; foreach (var pair in PR_train.PreferenceRelationsByUser) { int indexOfUser = pair.Key; SparseMatrix preferenceRelationsOfUser = pair.Value; // For each preference relation of this user, update the latent feature vectors foreach (var entry in preferenceRelationsOfUser.EnumerateIndexed(Zeros.AllowSkip)) { int indexOfItem_i = entry.Item1; int indexOfItem_j = entry.Item2; if (indexOfItem_i >= indexOfItem_j) continue; double prefRelation_uij = 0; if (entry.Item3 == Config.Preferences.Preferred) { prefRelation_uij = 1.0; } else if (entry.Item3 == Config.Preferences.EquallyPreferred) { prefRelation_uij = 0.5; } else if (entry.Item3 == Config.Preferences.LessPreferred) { prefRelation_uij = 0.0; } else { Debug.Assert(true, "Should not be here."); } // TODO: Maybe it can be faster to do two dot products to remove the substraction (lose sparse property I think) double estimate_uij = P[indexOfUser].DotProduct(Q[indexOfItem_i] - Q[indexOfItem_j]); // Eq. 2 double normalized_estimate_uij = SpecialFunctions.InverseLogit(estimate_uij); // Eq. 2 eSum += Math.Pow((prefRelation_uij - normalized_estimate_uij), 2); // Sum the error of this preference relation // Sum the regularization term //for (int k = 0; k < factorCount; ++k) // { // eSum += (regularizationOfUser * 0.5) * (Math.Pow(P[indexOfUser, k], 2) // + Math.Pow(Q[k, indexOfItem_i], 2) + Math.Pow(Q[k, indexOfItem_j], 2)); // } } } double regularizationPenaty = regularizationOfUser * P.Sum(x=>x.SquaredSum()); regularizationPenaty += regularizationOfItem * Q.Sum(x => x.SquaredSum()); eSum += regularizationPenaty; // Record the current error currentErrorSum = eSum; Utils.PrintEpoch("Epoch", epoch, maxEpoch, "Learning error", eSum.ToString("0.0"), true); // Stop the learning if the regularized error falls below a certain threshold // Actually we only check it once every several epoches if (previousErrorSum - currentErrorSum < 0.0001) { Console.WriteLine("Improvment less than 0.0001, learning stopped."); break; } previousErrorSum = currentErrorSum; } } }
public static PrefRelations PredictPrefRelations(PrefRelations PR_train, SparseMatrix PR_unknown, int maxEpoch, double learnRate, double regularizationOfUser, double regularizationOfItem, int factorCount) { // Latent features List<Vector<double>> P; List<Vector<double>> Q; //Matrix<double> P; //Matrix<double> Q; LearnLatentFeatures(PR_train, maxEpoch, learnRate, regularizationOfUser, regularizationOfItem, factorCount, out P, out Q); PrefRelations PR_predicted = new PrefRelations(PR_train.ItemCount); Object lockMe = new Object(); Parallel.ForEach(PR_unknown.EnumerateRowsIndexed(), user => { int indexOfUser = user.Item1; Vector<double> unknownPreferencesOfUser = user.Item2; SparseMatrix predictedPreferencesOfUser = new SparseMatrix(PR_train.ItemCount, PR_train.ItemCount); // Predict each unknown preference foreach(var unknownPreference in unknownPreferencesOfUser.EnumerateIndexed(Zeros.AllowSkip)) { int indexOfItem_i = unknownPreference.Item1; int indexOfItem_j = (int)unknownPreference.Item2; double estimate_uij = P[indexOfUser].DotProduct(Q[indexOfItem_i] - Q[indexOfItem_j]); // Eq. 2 double normalized_estimate_uij = Core.SpecialFunctions.InverseLogit(estimate_uij); // pi_uij in paper predictedPreferencesOfUser[indexOfItem_i, indexOfItem_j] = normalized_estimate_uij; } lock(lockMe) { PR_predicted[indexOfUser] = predictedPreferencesOfUser; } }); return PR_predicted; }
private static double cosinePR(PrefRelations PR, int u_a, int u_b) { SparseMatrix pr_a = PR[u_a]; SparseMatrix pr_b = PR[u_b]; //Debug.Assert(pr_a.Trace() == SparseMatrix.Zero, "The diagonal of user preference relation matrix should be left empty."); //Debug.Assert(pr_b.Trace() == SparseMatrix.Zero, "The diagonal of user preference relation matrix should be left empty."); // The number of preference relations agreed between users a and b int agreedCount = pr_a.Fold2((count, prefOfA, prefOfB) => count + (prefOfA == prefOfB ? 1 : 0), 0, pr_b, Zeros.AllowSkip); #region Obsolate naive implementation /* // TODO: there should be a faster lambda way of doing this // Loop through all non-zero elements foreach (Tuple<int, int, double> element in pr_a.EnumerateIndexed(Zeros.AllowSkip)) { int item_i = element.Item1; int item_j = element.Item2; double preference_a = element.Item3; // Because pr_ij is just the reverse of pr_ji, // we count only i-j to avoid double counting // and also reduce the number of calling pr_b[] if (item_i > item_j) { if (preference_a == pr_b[item_i, item_j]) { ++agreedCount; } } } */ #endregion // Multiplicaiton result can be too large and cause overflow, // therefore we do Sqrt() first and then multiply double normalization = checked(Math.Sqrt((double)pr_a.NonZerosCount) * Math.Sqrt((double)pr_b.NonZerosCount)); // Very small value return agreedCount / normalization; }
// We need to directly compute the position matrix because the PR would be too big to fit into memory public static SparseMatrix PredictPrefRelations(PrefRelations PR_train, Dictionary <int, List <int> > PR_unknown, int maxEpoch, double learnRate, double regularizationOfUser, double regularizationOfItem, int factorCount, List <double> quantizer) { // Latent features List <Vector <double> > P; List <Vector <double> > Q; //Matrix<double> P; //Matrix<double> Q; //SparseMatrix positionMatrix = new SparseMatrix(PR_train.UserCount, PR_train.ItemCount); Vector <double>[] positionMatrixCache = new Vector <double> [PR_train.UserCount]; LearnLatentFeatures(PR_train, maxEpoch, learnRate, regularizationOfUser, regularizationOfItem, factorCount, out P, out Q); //PrefRelations PR_predicted = new PrefRelations(PR_train.ItemCount); Object lockMe = new Object(); Parallel.ForEach(PR_unknown, user => { Utils.PrintEpoch("Epoch", user.Key, PR_unknown.Count); int indexOfUser = user.Key; List <int> unknownItemsOfUser = user.Value; //SparseMatrix predictedPreferencesOfUser = new SparseMatrix(PR_train.ItemCount, PR_train.ItemCount); List <Tuple <int, int, double> > predictedPreferencesOfUserCache = new List <Tuple <int, int, double> >(); // Predict each unknown preference foreach (int indexOfItem_i in unknownItemsOfUser) { foreach (int indexOfItem_j in unknownItemsOfUser) { if (indexOfItem_i == indexOfItem_j) { continue; } double estimate_uij = P[indexOfUser].DotProduct(Q[indexOfItem_i] - Q[indexOfItem_j]); // Eq. 2 double normalized_estimate_uij = Core.SpecialFunctions.InverseLogit(estimate_uij); // pi_uij in paper predictedPreferencesOfUserCache.Add(new Tuple <int, int, double>(indexOfItem_i, indexOfItem_j, normalized_estimate_uij)); //predictedPreferencesOfUser[indexOfItem_i, indexOfItem_j] = normalized_estimate_uij; } } // Note: it shows better performance to not quantize here /* * DataMatrix predictedPreferencesOfUser = * new DataMatrix(SparseMatrix.OfIndexed(PR_train.ItemCount, PR_train.ItemCount, predictedPreferencesOfUserCache)); * predictedPreferencesOfUser.Quantization(0, 1.0, quantizer); * Vector<double> positionsOfUser = PrefRelations.PreferencesToPositions(predictedPreferencesOfUser.Matrix); */ double[] positionByItem = new double[PR_train.ItemCount]; foreach (var triplet in predictedPreferencesOfUserCache) { int indexOfItem_i = triplet.Item1; int indexOfItem_j = triplet.Item2; double preference = triplet.Item3; if (preference > 0.5) { positionByItem[indexOfItem_i]++; positionByItem[indexOfItem_j]--; } else if (preference < 0.5) { positionByItem[indexOfItem_i]--; positionByItem[indexOfItem_j]++; } } int normalizationTerm = unknownItemsOfUser.Count * 2 - 2; for (int i = 0; i < positionByItem.Length; i++) { if (positionByItem[i] != 0) { positionByItem[i] /= normalizationTerm; } } Vector <double> positionsOfUser = SparseVector.OfEnumerable(positionByItem); lock (lockMe) { positionMatrixCache[indexOfUser] = positionsOfUser; //positionMatrix.SetRow(indexOfUser, positionsOfUser); //PR_predicted[indexOfUser] = predictedPreferencesOfUser; } }); return(SparseMatrix.OfRowVectors(positionMatrixCache)); }
public static Dictionary <int, List <int> > RecommendTopN(PrefRelations PR_train, int K, List <int> targetUsers, int topN) { Dictionary <int, List <int> > topNItemsByUser = new Dictionary <int, List <int> >(targetUsers.Count); int userCount = PR_train.UserCount; int itemCount = PR_train.ItemCount; SparseMatrix positionMatrix = PR_train.GetPositionMatrix(); // Make recommendations to each target user foreach (int indexOfUser in targetUsers) { Utils.PrintEpoch("Current user/total", indexOfUser, targetUsers.Count); // TODO: should have a default list of popular items in case of cold users Dictionary <int, double> topNItems = new Dictionary <int, double>(topN); // To store recommendations for indexOfUser Dictionary <int, double> topKNeighbors = KNNCore.GetTopKNeighborsByUser(PR_train.UserSimilarities, indexOfUser, K); SparseVector predictedPositionsOfUser = new SparseVector(itemCount); // Compute the predicted position of each item for indexOfUser for (int indexOfItem = 0; indexOfItem < itemCount; ++indexOfItem) { // Compute the position of this item for the user // by combining neighbors' positions on this item double weightedSum = 0; double weightSum = 0; int itemSeenCount = 0; foreach (KeyValuePair <int, double> neighbor in topKNeighbors) { int indexOfNeighbor = neighbor.Key; double similarityOfNeighbor = neighbor.Value; double itemPositionOfNeighbor = positionMatrix[indexOfNeighbor, indexOfItem]; // TODO: Zero means it is not seen by the neighbor but // it may also be the position value of 0 if (itemPositionOfNeighbor != 0) { weightSum += similarityOfNeighbor; weightedSum += itemPositionOfNeighbor * similarityOfNeighbor; itemSeenCount++; } } // If any neighbor has seen this item if (itemSeenCount != 0) { // TODO: Add user mean may improve the performance predictedPositionsOfUser[indexOfItem] = weightedSum / weightSum; } } List <int> indexesOfItemSortedByPosition = Enumerable.Range(0, itemCount).ToList(); Sorting.Sort(predictedPositionsOfUser, indexesOfItemSortedByPosition); indexesOfItemSortedByPosition.Reverse(); // Make it descending order by position // Add the top N items for user uid topNItemsByUser[indexOfUser] = indexesOfItemSortedByPosition.GetRange(0, topN); } return(topNItemsByUser); #region Old version /* * //===============Initialize variables================== * * // Recommendations are stored here indexed by user id * Dictionary<int, List<int>> userRecommendations = new Dictionary<int, List<int>>(targetUsers.Count); * * int userCount = PR_train.UserCount; * int itemCount = PR_train.ItemCount; * * // Build the item position matrix * // each element indicates the position(kind of goodness) of an item to the user * SparseMatrix itemPositions = new SparseMatrix(userCount, itemCount); * * Object lockMe = new Object(); * Parallel.ForEach(PR_train.GetAllPreferenceRelations, pair => * { * int uid = pair.Key; * Utilities.PrintEpoch("Current user/total", uid, userCount); * SparseMatrix userPreferences = pair.Value; * foreach (Tuple<int, Vector<double>> preferences in userPreferences.EnumerateRowsIndexed()) * { * int iid = preferences.Item1; * SparseVector iidPreferences = SparseVector.OfVector(preferences.Item2); * // The number of items that are preferred to item iid * int preferredCount = 0; * // The number of items that are less preferred to item iid * int lessPreferredCount = 0; * // The number of items (other than item iid) that are equally preferred to item iid * // TODO: I'm not sure if we should count unknown preferences or not? * int equallyPreferredCount = 0; * * // Note: don't use the Count() method it won't skip Zeros * foreach (double preference in iidPreferences.Enumerate(Zeros.AllowSkip)) * { * if (preference == Config.Preferences.Preferred) ++preferredCount; * else if (preference == Config.Preferences.LessPreferred) ++lessPreferredCount; * else if (preference == Config.Preferences.EquallyPreferred) ++equallyPreferredCount; * else { Debug.Assert(false, "We should not see any non-match value here."); } * } * * double position = ((double)lessPreferredCount - preferredCount) / (preferredCount + lessPreferredCount + equallyPreferredCount); * * Debug.Assert(position >= -1 && position <= 1); // According to the paper * if (position == 0) { Debug.Assert(preferredCount == lessPreferredCount); } // According to the paper * * lock (lockMe) * { * itemPositions[uid, iid] = position; * } * } * }); * * // Need to cache the items appeared in each user's profile * // as we won't consider unseen items as recommendations * Dictionary<int, List<int>> seenItemsByUser = PR_train.GetSeenItemsByUser(); * * Matrix positionMatrix = PR_train.GetPositionMatrix(); * * Console.WriteLine("Recommending user/total"); * * // Make recommendations for each target user * foreach (int uid in targetUsers) * { * * Utilities.PrintEpoch("Current user/total", uid, targetUsers.Count); * * // TODO: should have a default list of popular items in case of cold users * Dictionary<int, double> topN = new Dictionary<int, double>(topNCount); // To store recommendations for user uid * * Dictionary<int, double> topK = KNNCore.GetTopK(PR_train.UserSimilarities, uid, K); * * // Get a list of all candidate items * List<int> candidateItems = new List<int>(); * foreach (int uid_neighbor in topK.Keys) * { * // TODO: union will remove duplicates, seems to be expensive here * candidateItems = candidateItems.Union(seenItemsByUser[uid_neighbor]).ToList(); * } * * // Loop through all candidate items * double minPosition = double.MinValue; * int min_iid = int.MinValue; * foreach (int iid in candidateItems) * { * // Compute the average position on item iid given * // by the top K neighbors. Each position is weighted * // by the similarity to the target user * double weightedSum = 0; * double weightSum = 0; * foreach (KeyValuePair<int, double> neighbor in topK) * { * int uidNeighbor = neighbor.Key; * double similarity = neighbor.Value; * double iidPosition = itemPositions[uidNeighbor, iid]; * // TODO: check the standard KNN, we should skip the unseen items somehow! * //if (neighborRating != 0) * // The weightSum serves as the normalization term * // it needs abs() because some metric such as Pearson * // may produce negative weights * weightSum += Math.Abs(similarity); * weightedSum += iidPosition * similarity; * } * * double position_predicted = weightedSum / weightSum; // TODO: add some kind of user mean to improve? * * // TODO: should have a default list of popular items in case of cold users * * if (topN.Count < topNCount) // Fill the top N list untill it is full * { * topN[iid] = position_predicted; * if (topN.Count == topNCount) * { * // Find the item with least position when we have N items in the list * min_iid = topN.Aggregate((l, r) => l.Value < r.Value ? l : r).Key; * minPosition = topN[min_iid]; * } * } * else if (position_predicted > minPosition) * { * // Replace the least similar neighbor * topN.Remove(min_iid); * topN[iid] = position_predicted; * * // Find the item with least position * min_iid = topN.Aggregate((l, r) => l.Value < r.Value ? l : r).Key; * minPosition = topN[min_iid]; * } * } * // Add the top N items for user uid * userRecommendations[uid] = topN.Keys.ToList(); * } * * return userRecommendations; */ #endregion }