/// <summary> /// Switch between different metrics. /// </summary> /// <param name="PR"></param> /// <param name="similarityMetric"></param> /// <returns></returns> private static void ComputeSimilarities(PrefRelations PR, Metric.SimilarityMetric similarityMetric, int maxCountOfNeighbors, double minSimilarityThreshold, out SimilarityData neighborsByObject, out HashSet<Tuple<int, int>> strongSimilarityIndicators) { int dimension = PR.UserCount; HashSet<Tuple<int, int>> strongSimilarityIndicators_out = new HashSet<Tuple<int, int>>(); SimilarityData neighborsByObject_out = new SimilarityData(maxCountOfNeighbors); // Compute similarity for the lower triangular Object lockMe = new Object(); Parallel.For(0, dimension, i => { Utils.PrintEpoch("Progress current/total", i, dimension); for (int j = 0; j < dimension; j++) { if (i == j) { continue; } // Skip self similarity else if (i > j) { switch (similarityMetric) { case SimilarityMetric.CosinePrefRelations: double cosinePR = Metric.cosinePR(PR, i, j); lock (lockMe) { if (cosinePR > minSimilarityThreshold) { strongSimilarityIndicators_out.Add(new Tuple<int, int>(i, j)); } neighborsByObject_out.AddSimilarityData(i, j, cosinePR); neighborsByObject_out.AddSimilarityData(j, i, cosinePR); } break; // More metrics to be added here. } } } }); neighborsByObject = neighborsByObject_out; strongSimilarityIndicators = strongSimilarityIndicators_out; }
private static void ComputeSimilarities(Matrix<double> R, Metric.SimilarityMetric similarityMetric, int maxCountOfNeighbors, double minSimilarityThreshold, out SimilarityData neighborsByObject, out HashSet<Tuple<int, int>> strongSimilarityIndicators) { int dimension = R.RowCount; List<Vector<double>> rows = new List<Vector<double>>(R.EnumerateRows()); // I assume that the rows are enumerated from first to last Debug.Assert(rows[0].Sum() == R.Row(0).Sum()); Debug.Assert(rows[rows.Count - 1].Sum() == R.Row(rows.Count - 1).Sum()); List<Tuple<int, int>> strongSimilarityIndicators_out = new List<Tuple<int, int>>(); SimilarityData neighborsByObject_out = new SimilarityData(maxCountOfNeighbors); Object lockMe = new Object(); Parallel.For(0, dimension, indexOfRow => { Utils.PrintEpoch("Progress current/total", indexOfRow, dimension); Dictionary<Tuple<int, int>,double> similarityCache = new Dictionary<Tuple<int, int>,double>(); List<Tuple<int, int>> strongSimilarityIndocatorCache = new List<Tuple<int, int>>(); for (int indexOfNeighbor = 0; indexOfNeighbor < dimension; indexOfNeighbor++) { if (indexOfRow == indexOfNeighbor) { continue; } // Skip self similarity else if (indexOfRow > indexOfNeighbor) { switch (similarityMetric) { case Metric.SimilarityMetric.CosineRating: // TODO: make a note that it really matters to make it sparse, it computes differently! double cosine = Metric.CosineR((SparseVector)rows[indexOfRow], (SparseVector)rows[indexOfNeighbor]); if(cosine > minSimilarityThreshold) { strongSimilarityIndocatorCache.Add(new Tuple<int, int>(indexOfRow, indexOfNeighbor)); strongSimilarityIndocatorCache.Add(new Tuple<int, int>(indexOfNeighbor, indexOfRow)); } similarityCache[new Tuple<int, int>(indexOfRow, indexOfNeighbor)] = cosine; similarityCache[new Tuple<int, int>(indexOfNeighbor, indexOfRow)] = cosine; break; case Metric.SimilarityMetric.PearsonRating: double pearson = Metric.PearsonR((SparseVector)rows[indexOfRow], (SparseVector)rows[indexOfNeighbor]); if (pearson> minSimilarityThreshold) { strongSimilarityIndocatorCache.Add(new Tuple<int, int>(indexOfRow, indexOfNeighbor)); strongSimilarityIndocatorCache.Add(new Tuple<int, int>(indexOfNeighbor, indexOfRow)); } similarityCache[new Tuple<int, int>(indexOfRow, indexOfNeighbor)] = pearson; similarityCache[new Tuple<int, int>(indexOfNeighbor, indexOfRow)] = pearson; break; } } } lock (lockMe) { foreach(var entry in similarityCache) { neighborsByObject_out.AddSimilarityData(entry.Key.Item1, entry.Key.Item2, entry.Value); } strongSimilarityIndicators_out.AddRange(strongSimilarityIndocatorCache); } }); neighborsByObject = neighborsByObject_out; neighborsByObject.SortAndRemoveNeighbors(); strongSimilarityIndicators = new HashSet<Tuple<int,int>>(strongSimilarityIndicators_out); }
private static void ComputeSimilarities(Matrix <double> R, Metric.SimilarityMetric similarityMetric, int maxCountOfNeighbors, double minSimilarityThreshold, out SimilarityData neighborsByObject, out HashSet <Tuple <int, int> > strongSimilarityIndicators) { int dimension = R.RowCount; List <Vector <double> > rows = new List <Vector <double> >(R.EnumerateRows()); // I assume that the rows are enumerated from first to last Debug.Assert(rows[0].Sum() == R.Row(0).Sum()); Debug.Assert(rows[rows.Count - 1].Sum() == R.Row(rows.Count - 1).Sum()); List <Tuple <int, int> > strongSimilarityIndicators_out = new List <Tuple <int, int> >(); SimilarityData neighborsByObject_out = new SimilarityData(maxCountOfNeighbors); Object lockMe = new Object(); Parallel.For(0, dimension, indexOfRow => { Utils.PrintEpoch("Progress current/total", indexOfRow, dimension); Dictionary <Tuple <int, int>, double> similarityCache = new Dictionary <Tuple <int, int>, double>(); List <Tuple <int, int> > strongSimilarityIndocatorCache = new List <Tuple <int, int> >(); for (int indexOfNeighbor = 0; indexOfNeighbor < dimension; indexOfNeighbor++) { if (indexOfRow == indexOfNeighbor) { continue; } // Skip self similarity else if (indexOfRow > indexOfNeighbor) { switch (similarityMetric) { case Metric.SimilarityMetric.CosineRating: // TODO: make a note that it really matters to make it sparse, it computes differently! double cosine = Metric.CosineR((SparseVector)rows[indexOfRow], (SparseVector)rows[indexOfNeighbor]); if (cosine > minSimilarityThreshold) { strongSimilarityIndocatorCache.Add(new Tuple <int, int>(indexOfRow, indexOfNeighbor)); strongSimilarityIndocatorCache.Add(new Tuple <int, int>(indexOfNeighbor, indexOfRow)); } similarityCache[new Tuple <int, int>(indexOfRow, indexOfNeighbor)] = cosine; similarityCache[new Tuple <int, int>(indexOfNeighbor, indexOfRow)] = cosine; break; case Metric.SimilarityMetric.PearsonRating: double pearson = Metric.PearsonR((SparseVector)rows[indexOfRow], (SparseVector)rows[indexOfNeighbor]); if (pearson > minSimilarityThreshold) { strongSimilarityIndocatorCache.Add(new Tuple <int, int>(indexOfRow, indexOfNeighbor)); strongSimilarityIndocatorCache.Add(new Tuple <int, int>(indexOfNeighbor, indexOfRow)); } similarityCache[new Tuple <int, int>(indexOfRow, indexOfNeighbor)] = pearson; similarityCache[new Tuple <int, int>(indexOfNeighbor, indexOfRow)] = pearson; break; } } } lock (lockMe) { foreach (var entry in similarityCache) { neighborsByObject_out.AddSimilarityData(entry.Key.Item1, entry.Key.Item2, entry.Value); } strongSimilarityIndicators_out.AddRange(strongSimilarityIndocatorCache); } }); neighborsByObject = neighborsByObject_out; neighborsByObject.SortAndRemoveNeighbors(); strongSimilarityIndicators = new HashSet <Tuple <int, int> >(strongSimilarityIndicators_out); }