public static void GetCosineOfPrefRelations(PrefRelations PR, int maxCountOfNeighbors, double strongSimilarityThreshold, out SimilarityData neighborsByObject) { HashSet<Tuple<int, int>> foo; ComputeSimilarities(PR, SimilarityMetric.CosinePrefRelations, maxCountOfNeighbors, strongSimilarityThreshold, out neighborsByObject, out foo); }
public static void GetPearsonOfColumns(DataMatrix R, int maxCountOfNeighbors, double strongSimilarityThreshold, out SimilarityData neighborsByObject, out HashSet <Tuple <int, int> > strongSimilarityIndicators) { ComputeSimilarities(R.Matrix.Transpose(), SimilarityMetric.PearsonRating, maxCountOfNeighbors, strongSimilarityThreshold, out neighborsByObject, out strongSimilarityIndicators); // Debug for (int i = 0; i < R.ItemCount && i < 100; i++) { for (int j = 0; j < R.ItemCount && j < 100; j++) { if (i == j) { continue; } double corr_ij = Correlation.Pearson((SparseVector)R.Matrix.Column(i), (SparseVector)R.Matrix.Column(j)); if (corr_ij > strongSimilarityThreshold) { Debug.Assert(strongSimilarityIndicators.Contains(new Tuple <int, int>(i, j))); Debug.Assert(strongSimilarityIndicators.Contains(new Tuple <int, int>(j, i))); } } } }
public static void GetCosineOfRows(DataMatrix R, int maxCountOfNeighbors, double strongSimilarityThreshold, out SimilarityData neighborsByObject) { HashSet<Tuple<int, int>> foo; ComputeSimilarities(R.Matrix, SimilarityMetric.CosineRating, maxCountOfNeighbors, strongSimilarityThreshold, out neighborsByObject, out foo); }
public static void GetCosineOfColumns(DataMatrix R, int maxCountOfNeighbors, double strongSimilarityThreshold, out SimilarityData neighborsByObject, out HashSet<Tuple<int, int>> strongSimilarityIndicators) { // Just rotate the matrix ComputeSimilarities(R.Matrix.Transpose(), SimilarityMetric.CosineRating, maxCountOfNeighbors, strongSimilarityThreshold, out neighborsByObject, out strongSimilarityIndicators); }
public static void GetCosineOfColumns(DataMatrix R, int maxCountOfNeighbors, double strongSimilarityThreshold, out SimilarityData neighborsByObject, out HashSet <Tuple <int, int> > strongSimilarityIndicators) { // Just rotate the matrix ComputeSimilarities(R.Matrix.Transpose(), SimilarityMetric.CosineRating, maxCountOfNeighbors, strongSimilarityThreshold, out neighborsByObject, out strongSimilarityIndicators); }
public static void GetCosineOfRows(DataMatrix R, int maxCountOfNeighbors, double strongSimilarityThreshold, out SimilarityData neighborsByObject) { HashSet <Tuple <int, int> > foo; ComputeSimilarities(R.Matrix, SimilarityMetric.CosineRating, maxCountOfNeighbors, strongSimilarityThreshold, out neighborsByObject, out foo); }
/// <summary> /// Switch between different metrics. /// </summary> /// <param name="PR"></param> /// <param name="similarityMetric"></param> /// <returns></returns> private static void ComputeSimilarities(PrefRelations PR, Metric.SimilarityMetric similarityMetric, int maxCountOfNeighbors, double minSimilarityThreshold, out SimilarityData neighborsByObject, out HashSet <Tuple <int, int> > strongSimilarityIndicators) { int dimension = PR.UserCount; HashSet <Tuple <int, int> > strongSimilarityIndicators_out = new HashSet <Tuple <int, int> >(); SimilarityData neighborsByObject_out = new SimilarityData(maxCountOfNeighbors); // Compute similarity for the lower triangular Object lockMe = new Object(); Parallel.For(0, dimension, i => { Utils.PrintEpoch("Progress current/total", i, dimension); for (int j = 0; j < dimension; j++) { if (i == j) { continue; } // Skip self similarity else if (i > j) { switch (similarityMetric) { case SimilarityMetric.CosinePrefRelations: double cosinePR = Metric.cosinePR(PR, i, j); lock (lockMe) { if (cosinePR > minSimilarityThreshold) { strongSimilarityIndicators_out.Add(new Tuple <int, int>(i, j)); } neighborsByObject_out.AddSimilarityData(i, j, cosinePR); neighborsByObject_out.AddSimilarityData(j, i, cosinePR); } break; // More metrics to be added here. } } } }); neighborsByObject = neighborsByObject_out; strongSimilarityIndicators = strongSimilarityIndicators_out; }
public static void GetPearsonOfColumns(DataMatrix R, int maxCountOfNeighbors, double strongSimilarityThreshold, out SimilarityData neighborsByObject, out HashSet<Tuple<int, int>> strongSimilarityIndicators) { ComputeSimilarities(R.Matrix.Transpose(), SimilarityMetric.PearsonRating, maxCountOfNeighbors, strongSimilarityThreshold, out neighborsByObject, out strongSimilarityIndicators); // Debug for(int i = 0; i < R.ItemCount&&i<100; i++) { for (int j = 0; j < R.ItemCount&&j<100; j++) { if (i == j) continue; double corr_ij = Correlation.Pearson((SparseVector)R.Matrix.Column(i),(SparseVector)R.Matrix.Column(j)); if(corr_ij>strongSimilarityThreshold) { Debug.Assert(strongSimilarityIndicators.Contains(new Tuple<int, int>(i, j))); Debug.Assert(strongSimilarityIndicators.Contains(new Tuple<int, int>(j, i))); } } } }
/// <summary> /// The user-based KNN collaborative filtering described in paper: /// Resnick, P., et al., "GroupLens: an open architecture for collaborative filtering of netnews", 1994. /// Link: http://dx.doi.org/10.1145/192844.192905 /// </summary> /// <param name="R_train"></param> /// <param name="R_unknown"></param> /// <param name="K"></param> /// <returns></returns> public static DataMatrix PredictRatings(DataMatrix R_train, DataMatrix R_unknown, SimilarityData neighborsByUser, int K) { // Debug Debug.Assert(R_train.UserCount == R_unknown.UserCount); Debug.Assert(R_train.ItemCount == R_unknown.ItemCount); int cappedCount = 0, globalMeanCount = 0; // This matrix stores predictions DataMatrix R_predicted = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount); // Basic statistics from train set double globalMean = R_train.GetGlobalMean(); Vector<double> meanByUser = R_train.GetUserMeans(); Vector<double> meanByItem = R_train.GetItemMeans(); // Predict ratings for each test user // Single thread appears to be very fast, parallel.foreach is unnecessary Object lockMe = new Object(); Parallel.ForEach(R_unknown.Users, user => { int indexOfUser = user.Item1; RatingVector userRatings = new RatingVector(R_train.GetRow(indexOfUser)); RatingVector unknownRatings = new RatingVector(user.Item2); Utils.PrintEpoch("Predicting user/total", indexOfUser, R_train.UserCount); // Note that there are more than K neighbors in the list (sorted by similarity) // we will use the top-K neighbors WHO HAVE RATED THE ITEM // For example we have 200 top neighbors, and we hope there are // K neighbors in the list have rated the item. We can't keep // everyone in the neighbor list because there are too many for large data sets var topNeighborsOfUser = neighborsByUser[indexOfUser]; //Dictionary<int, double> topKNeighbors = KNNCore.GetTopKNeighborsByUser(userSimilarities, indexOfUser, K); double meanOfUser = meanByUser[indexOfUser]; // Loop through each ratingto be predicted foreach (Tuple<int, double> unknownRating in unknownRatings.Ratings) { int itemIndex = unknownRating.Item1; double prediction; // TODO: we actually should use the Top-K neighbors // that have rated this item, otherwise we may have // only a few neighbors rated this item // Compute the average rating on item iid given // by the top K neighbors. Each rating is offsetted by // the neighbor's average and weighted by the similarity double weightedSum = 0; double weightSum = 0; int currentTopKCount = 0; foreach (KeyValuePair<int, double> neighbor in topNeighborsOfUser) { int neighborIndex = neighbor.Key; double similarityOfNeighbor = neighbor.Value; double itemRatingOfNeighbor = R_train[neighborIndex, itemIndex]; // We count only if the neighbor has seen this item before if (itemRatingOfNeighbor != 0) { weightSum += similarityOfNeighbor; weightedSum += (itemRatingOfNeighbor - meanByUser[neighborIndex]) * similarityOfNeighbor; currentTopKCount++; if (currentTopKCount >= K) { break; } // Stop when we have seen K neighbors } } // A zero weightedSum means this is a cold item and global mean will be assigned by default if (weightedSum != 0) { prediction = meanOfUser + weightedSum / weightSum; } else { prediction = globalMean; globalMeanCount++; } // Cap the ratings if (prediction > Config.Ratings.MaxRating) { cappedCount++; prediction = Config.Ratings.MaxRating; } if (prediction < Config.Ratings.MinRating) { cappedCount++; prediction = Config.Ratings.MinRating; } lock (lockMe) { R_predicted[indexOfUser, itemIndex] = prediction; } } }); Utils.PrintValue("# capped predictions", cappedCount.ToString("D")); Utils.PrintValue("# default predictions", globalMeanCount.ToString("D")); return R_predicted; }
private static void ComputeSimilarities(Matrix<double> R, Metric.SimilarityMetric similarityMetric, int maxCountOfNeighbors, double minSimilarityThreshold, out SimilarityData neighborsByObject, out HashSet<Tuple<int, int>> strongSimilarityIndicators) { int dimension = R.RowCount; List<Vector<double>> rows = new List<Vector<double>>(R.EnumerateRows()); // I assume that the rows are enumerated from first to last Debug.Assert(rows[0].Sum() == R.Row(0).Sum()); Debug.Assert(rows[rows.Count - 1].Sum() == R.Row(rows.Count - 1).Sum()); List<Tuple<int, int>> strongSimilarityIndicators_out = new List<Tuple<int, int>>(); SimilarityData neighborsByObject_out = new SimilarityData(maxCountOfNeighbors); Object lockMe = new Object(); Parallel.For(0, dimension, indexOfRow => { Utils.PrintEpoch("Progress current/total", indexOfRow, dimension); Dictionary<Tuple<int, int>,double> similarityCache = new Dictionary<Tuple<int, int>,double>(); List<Tuple<int, int>> strongSimilarityIndocatorCache = new List<Tuple<int, int>>(); for (int indexOfNeighbor = 0; indexOfNeighbor < dimension; indexOfNeighbor++) { if (indexOfRow == indexOfNeighbor) { continue; } // Skip self similarity else if (indexOfRow > indexOfNeighbor) { switch (similarityMetric) { case Metric.SimilarityMetric.CosineRating: // TODO: make a note that it really matters to make it sparse, it computes differently! double cosine = Metric.CosineR((SparseVector)rows[indexOfRow], (SparseVector)rows[indexOfNeighbor]); if(cosine > minSimilarityThreshold) { strongSimilarityIndocatorCache.Add(new Tuple<int, int>(indexOfRow, indexOfNeighbor)); strongSimilarityIndocatorCache.Add(new Tuple<int, int>(indexOfNeighbor, indexOfRow)); } similarityCache[new Tuple<int, int>(indexOfRow, indexOfNeighbor)] = cosine; similarityCache[new Tuple<int, int>(indexOfNeighbor, indexOfRow)] = cosine; break; case Metric.SimilarityMetric.PearsonRating: double pearson = Metric.PearsonR((SparseVector)rows[indexOfRow], (SparseVector)rows[indexOfNeighbor]); if (pearson> minSimilarityThreshold) { strongSimilarityIndocatorCache.Add(new Tuple<int, int>(indexOfRow, indexOfNeighbor)); strongSimilarityIndocatorCache.Add(new Tuple<int, int>(indexOfNeighbor, indexOfRow)); } similarityCache[new Tuple<int, int>(indexOfRow, indexOfNeighbor)] = pearson; similarityCache[new Tuple<int, int>(indexOfNeighbor, indexOfRow)] = pearson; break; } } } lock (lockMe) { foreach(var entry in similarityCache) { neighborsByObject_out.AddSimilarityData(entry.Key.Item1, entry.Key.Item2, entry.Value); } strongSimilarityIndicators_out.AddRange(strongSimilarityIndocatorCache); } }); neighborsByObject = neighborsByObject_out; neighborsByObject.SortAndRemoveNeighbors(); strongSimilarityIndicators = new HashSet<Tuple<int,int>>(strongSimilarityIndicators_out); }
/// <summary> /// Switch between different metrics. /// </summary> /// <param name="PR"></param> /// <param name="similarityMetric"></param> /// <returns></returns> private static void ComputeSimilarities(PrefRelations PR, Metric.SimilarityMetric similarityMetric, int maxCountOfNeighbors, double minSimilarityThreshold, out SimilarityData neighborsByObject, out HashSet<Tuple<int, int>> strongSimilarityIndicators) { int dimension = PR.UserCount; HashSet<Tuple<int, int>> strongSimilarityIndicators_out = new HashSet<Tuple<int, int>>(); SimilarityData neighborsByObject_out = new SimilarityData(maxCountOfNeighbors); // Compute similarity for the lower triangular Object lockMe = new Object(); Parallel.For(0, dimension, i => { Utils.PrintEpoch("Progress current/total", i, dimension); for (int j = 0; j < dimension; j++) { if (i == j) { continue; } // Skip self similarity else if (i > j) { switch (similarityMetric) { case SimilarityMetric.CosinePrefRelations: double cosinePR = Metric.cosinePR(PR, i, j); lock (lockMe) { if (cosinePR > minSimilarityThreshold) { strongSimilarityIndicators_out.Add(new Tuple<int, int>(i, j)); } neighborsByObject_out.AddSimilarityData(i, j, cosinePR); neighborsByObject_out.AddSimilarityData(j, i, cosinePR); } break; // More metrics to be added here. } } } }); neighborsByObject = neighborsByObject_out; strongSimilarityIndicators = strongSimilarityIndicators_out; }
public string GetReadyForOrdinal(bool saveLoadedData = true) { if (!ReadyForNumerical) { GetReadyForNumerical(); } if (ReadyForOrdinal) { return "Is ready."; } StringBuilder log = new StringBuilder(); Utils.StartTimer(); log.AppendLine(Utils.PrintHeading("Prepare preferecen relation data")); Console.WriteLine("Converting R_train into PR_train"); log.AppendLine("Converting R_train into PR_train"); PR_train = PrefRelations.CreateDiscrete(R_train); //Console.WriteLine("Converting R_test into PR_test"); //log.AppendLine("Converting R_test into PR_test"); //PR_test = PrefRelations.CreateDiscrete(R_test); log.AppendLine(Utils.StopTimer()); #region Prepare similarity data if (File.Exists(GetDataFileName("USP")) && File.Exists(GetDataFileName("ISP")) && File.Exists(GetDataFileName("SSIIP"))) { Utils.StartTimer(); Utils.PrintHeading("Load user, item, indicators variables (Pref based)"); UserSimilaritiesOfPref = Utils.IO<SimilarityData>.LoadObject(GetDataFileName("USP")); ItemSimilaritiesOfPref = Utils.IO<SimilarityData>.LoadObject(GetDataFileName("ISP")); StrongSimilarityIndicatorsByItemPref = Utils.IO<HashSet<Tuple<int, int>>>.LoadObject(GetDataFileName("SSIIP")); Utils.StopTimer(); } else { Utils.StartTimer(); Utils.PrintHeading("Compute user-user similarities (Pref based)"); Metric.GetCosineOfPrefRelations(PR_train, MaxCountOfNeighbors, StrongSimilarityThreshold, out UserSimilaritiesOfPref); Utils.StopTimer(); // For the moment, we use user-wise preferences to compute // item-item similarities, it is not the same as user-user pref similarities Utils.StartTimer(); Utils.PrintHeading("Compute item-item similarities (Pref based)"); DataMatrix PR_userwise_preferences = new DataMatrix(PR_train.GetPositionMatrix()); Metric.GetPearsonOfColumns(PR_userwise_preferences, MaxCountOfNeighbors, StrongSimilarityThreshold, out ItemSimilaritiesOfPref, out StrongSimilarityIndicatorsByItemPref); Utils.StopTimer(); if (saveLoadedData) { Utils.IO<SimilarityData>.SaveObject(UserSimilaritiesOfPref, GetDataFileName("USP")); Utils.IO<SimilarityData>.SaveObject(ItemSimilaritiesOfPref, GetDataFileName("ISP")); Utils.IO<HashSet<Tuple<int,int>>> .SaveObject(StrongSimilarityIndicatorsByItemPref, GetDataFileName("SSIIP")); } Utils.StopTimer(); } #endregion ReadyForOrdinal = true; return log.ToString(); }
public string GetReadyForNumerical(bool saveLoadedData = true) { if (ReadyForNumerical) { return "Is ready."; } StringBuilder log = new StringBuilder(); Utils.StartTimer(); log.AppendLine(Utils.PrintHeading("Create R_train/R_test sets from " + DataSetFile)); Utils.LoadMovieLensSplitByCount(DataSetFile, out R_train, out R_test, MinCountOfRatings, MaxCountOfRatings, CountOfRatingsForTrain, ShuffleData, Seed); Console.WriteLine(R_train.DatasetBrief("Train set")); Console.WriteLine(R_test.DatasetBrief("Test set")); log.AppendLine(R_train.DatasetBrief("Train set")); log.AppendLine(R_test.DatasetBrief("Test set")); R_unknown = R_test.IndexesOfNonZeroElements(); log.AppendLine(Utils.PrintValue("Relevant item criteria", RelevantItemCriteria.ToString("0.0"))); RelevantItemsByUser = ItemRecommendationCore.GetRelevantItemsByUser(R_test, RelevantItemCriteria); log.AppendLine(Utils.PrintValue("Mean # of relevant items per user", RelevantItemsByUser.Average(k => k.Value.Count).ToString("0"))); log.AppendLine(Utils.StopTimer()); #region Prepare similarity data if (File.Exists(GetDataFileName("USR")) && File.Exists(GetDataFileName("ISR")) && File.Exists(GetDataFileName("SSIIR"))) { Utils.StartTimer(); Utils.PrintHeading("Load user-user similarities (rating based)"); UserSimilaritiesOfRating = Utils.IO<SimilarityData>.LoadObject(GetDataFileName("USR")); Utils.StopTimer(); Utils.StartTimer(); Utils.PrintHeading("Load item-item similarities (rating based)"); ItemSimilaritiesOfRating = Utils.IO<SimilarityData>.LoadObject(GetDataFileName("ISR")); Utils.StopTimer(); Utils.StartTimer(); Utils.PrintHeading("Load item-item strong similarity indicators (rating based)"); StrongSimilarityIndicatorsByItemRating = Utils.IO<HashSet<Tuple<int, int>>>.LoadObject(GetDataFileName("SSIIR")); Utils.StopTimer(); } else { Utils.StartTimer(); Utils.PrintHeading("Compute user-user similarities (rating based)"); Metric.GetPearsonOfRows(R_train, MaxCountOfNeighbors,StrongSimilarityThreshold, out UserSimilaritiesOfRating); if (saveLoadedData) { Utils.IO<SimilarityData>.SaveObject(UserSimilaritiesOfRating, GetDataFileName("USR")); } Utils.StopTimer(); Utils.StartTimer(); Utils.PrintHeading("Compute item-item similarities (rating based)"); Metric.GetPearsonOfColumns(R_train, MaxCountOfNeighbors, StrongSimilarityThreshold, out ItemSimilaritiesOfRating, out StrongSimilarityIndicatorsByItemRating); if (saveLoadedData) { Utils.IO<SimilarityData>.SaveObject(ItemSimilaritiesOfRating, GetDataFileName("ISR")); Utils.IO<HashSet<Tuple<int,int>>> .SaveObject(StrongSimilarityIndicatorsByItemRating, GetDataFileName("SSIIR")); } Utils.StopTimer(); } #endregion ReadyForNumerical = true; return log.ToString(); }
public static DataMatrix PredictRatings(PrefRelations PR_train, DataMatrix R_unknown, int K, SimilarityData neighborsByUser) { Debug.Assert(PR_train.UserCount == R_unknown.UserCount); Debug.Assert(PR_train.ItemCount == R_unknown.ItemCount); // This matrix stores predictions DataMatrix R_predicted = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount); // This can be considered as the R_train in standard UserKNN SparseMatrix positionMatrix = PR_train.GetPositionMatrix(); DataMatrix ratingMatrixFromPositions = new DataMatrix(positionMatrix); Vector<double> meanByUser = ratingMatrixFromPositions.GetUserMeans(); Vector<double> meanByItem = ratingMatrixFromPositions.GetItemMeans(); double globalMean = ratingMatrixFromPositions.GetGlobalMean(); // Predict positions for each test user // Appears to be very fast, parallel.foreach is unnecessary foreach (Tuple<int, Vector<double>> user in R_unknown.Users) { int indexOfUser = user.Item1; Vector<double> indexesOfUnknownRatings = user.Item2; Utils.PrintEpoch("Predicting user/total", indexOfUser, PR_train.UserCount); // Note that there are more than K neighbors in the list (sorted by similarity) // we will use the top-K neighbors WHO HAVE RATED THE ITEM // For example we have 200 top neighbors, and we hope there are // K neighbors in the list have rated the item. We can't keep // everyone in the neighbor list because there are too many for large data sets var topNeighborsOfUser = neighborsByUser[indexOfUser]; double meanOfUser = meanByUser[indexOfUser]; // Loop through each position to be predicted foreach (Tuple<int, double> unknownRating in indexesOfUnknownRatings.EnumerateIndexed(Zeros.AllowSkip)) { int indexOfUnknownItem = unknownRating.Item1; // Compute the position of this item for the user // by combining neighbors' positions on this item double weightedSum = 0; double weightSum = 0; int currentTopKCount = 0; foreach (KeyValuePair<int, double> neighbor in topNeighborsOfUser) { int indexOfNeighbor = neighbor.Key; double similarityOfNeighbor = neighbor.Value; double itemPositionOfNeighbor = ratingMatrixFromPositions[indexOfNeighbor, indexOfUnknownItem]; // We count only if the neighbor has seen this item before if (itemPositionOfNeighbor != 0) { // Recall that we use a constant to hold position value 0 // we revert it back here if (itemPositionOfNeighbor == Config.ZeroInSparseMatrix) { Debug.Assert(true, "By using the PositionShift constant, we should not be in here."); itemPositionOfNeighbor = 0; } weightSum += similarityOfNeighbor; weightedSum += (itemPositionOfNeighbor - meanByUser[indexOfNeighbor]) * similarityOfNeighbor; currentTopKCount++; if(currentTopKCount>= K) { break; } } } // If any neighbor has seen this item if (currentTopKCount != 0) { // TODO: Add user mean may improve the performance R_predicted[indexOfUser, indexOfUnknownItem] = meanOfUser + weightedSum / weightSum; } else { R_predicted[indexOfUser, indexOfUnknownItem] = globalMean; } } }//); return R_predicted; }
private static void ComputeSimilarities(Matrix <double> R, Metric.SimilarityMetric similarityMetric, int maxCountOfNeighbors, double minSimilarityThreshold, out SimilarityData neighborsByObject, out HashSet <Tuple <int, int> > strongSimilarityIndicators) { int dimension = R.RowCount; List <Vector <double> > rows = new List <Vector <double> >(R.EnumerateRows()); // I assume that the rows are enumerated from first to last Debug.Assert(rows[0].Sum() == R.Row(0).Sum()); Debug.Assert(rows[rows.Count - 1].Sum() == R.Row(rows.Count - 1).Sum()); List <Tuple <int, int> > strongSimilarityIndicators_out = new List <Tuple <int, int> >(); SimilarityData neighborsByObject_out = new SimilarityData(maxCountOfNeighbors); Object lockMe = new Object(); Parallel.For(0, dimension, indexOfRow => { Utils.PrintEpoch("Progress current/total", indexOfRow, dimension); Dictionary <Tuple <int, int>, double> similarityCache = new Dictionary <Tuple <int, int>, double>(); List <Tuple <int, int> > strongSimilarityIndocatorCache = new List <Tuple <int, int> >(); for (int indexOfNeighbor = 0; indexOfNeighbor < dimension; indexOfNeighbor++) { if (indexOfRow == indexOfNeighbor) { continue; } // Skip self similarity else if (indexOfRow > indexOfNeighbor) { switch (similarityMetric) { case Metric.SimilarityMetric.CosineRating: // TODO: make a note that it really matters to make it sparse, it computes differently! double cosine = Metric.CosineR((SparseVector)rows[indexOfRow], (SparseVector)rows[indexOfNeighbor]); if (cosine > minSimilarityThreshold) { strongSimilarityIndocatorCache.Add(new Tuple <int, int>(indexOfRow, indexOfNeighbor)); strongSimilarityIndocatorCache.Add(new Tuple <int, int>(indexOfNeighbor, indexOfRow)); } similarityCache[new Tuple <int, int>(indexOfRow, indexOfNeighbor)] = cosine; similarityCache[new Tuple <int, int>(indexOfNeighbor, indexOfRow)] = cosine; break; case Metric.SimilarityMetric.PearsonRating: double pearson = Metric.PearsonR((SparseVector)rows[indexOfRow], (SparseVector)rows[indexOfNeighbor]); if (pearson > minSimilarityThreshold) { strongSimilarityIndocatorCache.Add(new Tuple <int, int>(indexOfRow, indexOfNeighbor)); strongSimilarityIndocatorCache.Add(new Tuple <int, int>(indexOfNeighbor, indexOfRow)); } similarityCache[new Tuple <int, int>(indexOfRow, indexOfNeighbor)] = pearson; similarityCache[new Tuple <int, int>(indexOfNeighbor, indexOfRow)] = pearson; break; } } } lock (lockMe) { foreach (var entry in similarityCache) { neighborsByObject_out.AddSimilarityData(entry.Key.Item1, entry.Key.Item2, entry.Value); } strongSimilarityIndicators_out.AddRange(strongSimilarityIndocatorCache); } }); neighborsByObject = neighborsByObject_out; neighborsByObject.SortAndRemoveNeighbors(); strongSimilarityIndicators = new HashSet <Tuple <int, int> >(strongSimilarityIndicators_out); }
public static void GetCosineOfPrefRelations(PrefRelations PR, int maxCountOfNeighbors, double strongSimilarityThreshold, out SimilarityData neighborsByObject) { HashSet <Tuple <int, int> > foo; ComputeSimilarities(PR, SimilarityMetric.CosinePrefRelations, maxCountOfNeighbors, strongSimilarityThreshold, out neighborsByObject, out foo); }