public void GetPositionMatrix() { /* * 5 3 0 1 * 4 0 0 1 * 1 1 0 5 * 1 0 0 4 * 0 1 5 4 */ DataMatrix R = GetSampleRatingMatrix(); PrefRelations PR = PrefRelations.CreateDiscrete(R); // act SparseMatrix positionMatrix = PR.GetPositionMatrix(); // assert // How many ratings we have then how many positions we have Debug.Assert(positionMatrix.NonZerosCount == R.AsSparseMatrix.NonZerosCount); // Check if each rating has a corresponding position // we have check the count so don't need to check the oppsite foreach (Tuple <int, int, double> element in R.AsSparseMatrix.EnumerateIndexed(Zeros.AllowSkip)) { int indexOfUser = element.Item1; int indexOfItem = element.Item2; double rating = element.Item3; Debug.Assert(positionMatrix[indexOfUser, indexOfItem] != SparseMatrix.Zero); } }
public static void GetCosineOfPrefRelations(PrefRelations PR, int maxCountOfNeighbors, double strongSimilarityThreshold, out SimilarityData neighborsByObject) { HashSet <Tuple <int, int> > foo; ComputeSimilarities(PR, SimilarityMetric.CosinePrefRelations, maxCountOfNeighbors, strongSimilarityThreshold, out neighborsByObject, out foo); }
public void CreateDiscrete() { /* * 5 3 0 1 * 4 0 0 1 * 1 1 0 5 * 1 0 0 4 * 0 1 5 4 */ DataMatrix R = GetSampleRatingMatrix(); // act PrefRelations PR = PrefRelations.CreateDiscrete(R); // assert foreach (KeyValuePair <int, SparseMatrix> user in PR.PreferenceRelationsByUser) { int indexOfUser = user.Key; SparseMatrix preferencesOfUser = user.Value; // Note that the diagonal (item compares to itsself) is elft empty Debug.Assert(preferencesOfUser.Trace() == 0); // Check if the correct number of preference relations have been created Debug.Assert((Math.Pow(R.GetNonZerosCountOfRow(indexOfUser), 2) - R.GetNonZerosCountOfRow(indexOfUser)) == preferencesOfUser.NonZerosCount); } // Check if the first user's preferences are correct Debug.WriteLine("PR[0][0, 0]=" + PR[0][0, 0]); Debug.Assert(PR[0][0, 0] == SparseMatrix.Zero); Debug.Assert(PR[0][0, 1] == Config.Preferences.Preferred); Debug.Assert(PR[0][1, 0] == Config.Preferences.LessPreferred); Debug.Assert(PR[0][0, 2] == SparseMatrix.Zero); Debug.Assert(PR[0][2, 0] == SparseMatrix.Zero); Debug.Assert(PR[0][1, 2] == SparseMatrix.Zero); Debug.Assert(PR[0][2, 1] == SparseMatrix.Zero); Debug.Assert(PR[0][1, 3] == Config.Preferences.Preferred); Debug.Assert(PR[0][3, 1] == Config.Preferences.LessPreferred); // Check if the last user's preferences are correct Debug.Assert(PR[4][1, 1] == SparseMatrix.Zero); Debug.Assert(PR[4][0, 1] == SparseMatrix.Zero); Debug.Assert(PR[4][1, 0] == SparseMatrix.Zero); Debug.Assert(PR[4][0, 2] == SparseMatrix.Zero); Debug.Assert(PR[4][2, 0] == SparseMatrix.Zero); Debug.Assert(PR[4][1, 2] == Config.Preferences.LessPreferred); Debug.Assert(PR[4][2, 1] == Config.Preferences.Preferred); Debug.Assert(PR[4][1, 3] == Config.Preferences.LessPreferred); Debug.Assert(PR[4][3, 1] == Config.Preferences.Preferred); }
public void PreferencesToPositions() { /* * 5 3 0 1 * 4 0 0 1 * 1 1 0 5 * 1 0 0 4 * 0 1 5 4 */ DataMatrix R = GetSampleRatingMatrix(); PrefRelations PR = PrefRelations.CreateDiscrete(R); // act // Convert first, Third, and last users' preferences to positions Vector <double> positionsOfUserFirst = PR.PreferencesToPositions(PR[0]); Vector <double> positionsOfUserThird = PR.PreferencesToPositions(PR[2]); Vector <double> positionsOfUserLast = PR.PreferencesToPositions(PR[4]); // assert // Check first user Debug.Assert(positionsOfUserFirst[0] == 1 + Config.Preferences.PositionShift); Debug.Assert(positionsOfUserFirst[1] == 0 + Config.Preferences.PositionShift); Debug.Assert(positionsOfUserFirst[2] == SparseMatrix.Zero); Debug.Assert(positionsOfUserFirst[3] == -1 + Config.Preferences.PositionShift); // Check third user Debug.Assert(positionsOfUserThird[0] == -0.5 + Config.Preferences.PositionShift); Debug.Assert(positionsOfUserThird[1] == -0.5 + Config.Preferences.PositionShift); Debug.Assert(positionsOfUserThird[2] == SparseMatrix.Zero); Debug.Assert(positionsOfUserThird[3] == 1 + Config.Preferences.PositionShift); // Check second last user Debug.Assert(positionsOfUserLast[0] == SparseMatrix.Zero); Debug.Assert(positionsOfUserLast[1] == -1 + Config.Preferences.PositionShift); Debug.Assert(positionsOfUserLast[2] == 1 + Config.Preferences.PositionShift); Debug.Assert(positionsOfUserLast[3] == 0 + Config.Preferences.PositionShift); // The number of positions should match the number of ratings by each user Debug.Assert(positionsOfUserFirst.GetNonZerosCount() == R.GetNonZerosCountOfRow(0), String.Format("{0}=={1}", positionsOfUserFirst.GetNonZerosCount(), R.GetNonZerosCountOfRow(0))); Debug.Assert(positionsOfUserThird.GetNonZerosCount() == R.GetNonZerosCountOfRow(2), String.Format("{0}=={1}", positionsOfUserThird.GetNonZerosCount(), R.GetNonZerosCountOfRow(2))); Debug.Assert(positionsOfUserLast.GetNonZerosCount() == R.GetNonZerosCountOfRow(4), String.Format("{0}=={1}", positionsOfUserLast.GetNonZerosCount(), R.GetNonZerosCountOfRow(4))); }
/// <summary> /// Switch between different metrics. /// </summary> /// <param name="PR"></param> /// <param name="similarityMetric"></param> /// <returns></returns> private static void ComputeSimilarities(PrefRelations PR, Metric.SimilarityMetric similarityMetric, int maxCountOfNeighbors, double minSimilarityThreshold, out SimilarityData neighborsByObject, out HashSet <Tuple <int, int> > strongSimilarityIndicators) { int dimension = PR.UserCount; HashSet <Tuple <int, int> > strongSimilarityIndicators_out = new HashSet <Tuple <int, int> >(); SimilarityData neighborsByObject_out = new SimilarityData(maxCountOfNeighbors); // Compute similarity for the lower triangular Object lockMe = new Object(); Parallel.For(0, dimension, i => { Utils.PrintEpoch("Progress current/total", i, dimension); for (int j = 0; j < dimension; j++) { if (i == j) { continue; } // Skip self similarity else if (i > j) { switch (similarityMetric) { case SimilarityMetric.CosinePrefRelations: double cosinePR = Metric.cosinePR(PR, i, j); lock (lockMe) { if (cosinePR > minSimilarityThreshold) { strongSimilarityIndicators_out.Add(new Tuple <int, int>(i, j)); } neighborsByObject_out.AddSimilarityData(i, j, cosinePR); neighborsByObject_out.AddSimilarityData(j, i, cosinePR); } break; // More metrics to be added here. } } } }); neighborsByObject = neighborsByObject_out; strongSimilarityIndicators = strongSimilarityIndicators_out; }
private static double cosinePR(PrefRelations PR, int u_a, int u_b) { SparseMatrix pr_a = PR[u_a]; SparseMatrix pr_b = PR[u_b]; //Debug.Assert(pr_a.Trace() == SparseMatrix.Zero, "The diagonal of user preference relation matrix should be left empty."); //Debug.Assert(pr_b.Trace() == SparseMatrix.Zero, "The diagonal of user preference relation matrix should be left empty."); // The number of preference relations agreed between users a and b int agreedCount = pr_a.Fold2((count, prefOfA, prefOfB) => count + (prefOfA == prefOfB ? 1 : 0), 0, pr_b, Zeros.AllowSkip); #region Obsolate naive implementation /* * // TODO: there should be a faster lambda way of doing this * // Loop through all non-zero elements * foreach (Tuple<int, int, double> element in pr_a.EnumerateIndexed(Zeros.AllowSkip)) * { * int item_i = element.Item1; * int item_j = element.Item2; * double preference_a = element.Item3; * // Because pr_ij is just the reverse of pr_ji, * // we count only i-j to avoid double counting * // and also reduce the number of calling pr_b[] * if (item_i > item_j) * { * if (preference_a == pr_b[item_i, item_j]) * { ++agreedCount; * } * } * } */ #endregion // Multiplicaiton result can be too large and cause overflow, // therefore we do Sqrt() first and then multiply double normalization = checked (Math.Sqrt((double)pr_a.NonZerosCount) * Math.Sqrt((double)pr_b.NonZerosCount)); // Very small value return(agreedCount / normalization); }
public string GetReadyForOrdinal(bool saveLoadedData = true) { if (!ReadyForNumerical) { GetReadyForNumerical(); } if (ReadyForOrdinal) { return("Is ready."); } StringBuilder log = new StringBuilder(); Utils.StartTimer(); log.AppendLine(Utils.PrintHeading("Prepare preferecen relation data")); Console.WriteLine("Converting R_train into PR_train"); log.AppendLine("Converting R_train into PR_train"); PR_train = PrefRelations.CreateDiscrete(R_train); //Console.WriteLine("Converting R_test into PR_test"); //log.AppendLine("Converting R_test into PR_test"); //PR_test = PrefRelations.CreateDiscrete(R_test); log.AppendLine(Utils.StopTimer()); #region Prepare similarity data if (File.Exists(GetDataFileName("USP")) && File.Exists(GetDataFileName("ISP")) && File.Exists(GetDataFileName("SSIIP"))) { Utils.StartTimer(); Utils.PrintHeading("Load user, item, indicators variables (Pref based)"); UserSimilaritiesOfPref = Utils.IO <SimilarityData> .LoadObject(GetDataFileName("USP")); ItemSimilaritiesOfPref = Utils.IO <SimilarityData> .LoadObject(GetDataFileName("ISP")); StrongSimilarityIndicatorsByItemPref = Utils.IO <HashSet <Tuple <int, int> > > .LoadObject(GetDataFileName("SSIIP")); Utils.StopTimer(); } else { Utils.StartTimer(); Utils.PrintHeading("Compute user-user similarities (Pref based)"); Metric.GetCosineOfPrefRelations(PR_train, MaxCountOfNeighbors, StrongSimilarityThreshold, out UserSimilaritiesOfPref); Utils.StopTimer(); // For the moment, we use user-wise preferences to compute // item-item similarities, it is not the same as user-user pref similarities Utils.StartTimer(); Utils.PrintHeading("Compute item-item similarities (Pref based)"); DataMatrix PR_userwise_preferences = new DataMatrix(PR_train.GetPositionMatrix()); Metric.GetPearsonOfColumns(PR_userwise_preferences, MaxCountOfNeighbors, StrongSimilarityThreshold, out ItemSimilaritiesOfPref, out StrongSimilarityIndicatorsByItemPref); Utils.StopTimer(); if (saveLoadedData) { Utils.IO <SimilarityData> .SaveObject(UserSimilaritiesOfPref, GetDataFileName("USP")); Utils.IO <SimilarityData> .SaveObject(ItemSimilaritiesOfPref, GetDataFileName("ISP")); Utils.IO <HashSet <Tuple <int, int> > > .SaveObject(StrongSimilarityIndicatorsByItemPref, GetDataFileName("SSIIP")); } Utils.StopTimer(); } #endregion ReadyForOrdinal = true; return(log.ToString()); }