예제 #1
0
파일: Metric.cs 프로젝트: lawrencewu/RecSys
     public static void GetCosineOfPrefRelations(PrefRelations PR, int maxCountOfNeighbors,
                     double strongSimilarityThreshold, out SimilarityData neighborsByObject)
     {
         HashSet<Tuple<int, int>> foo;
         ComputeSimilarities(PR, SimilarityMetric.CosinePrefRelations, maxCountOfNeighbors,
 strongSimilarityThreshold, out neighborsByObject, out foo);
     }
예제 #2
0
파일: Metric.cs 프로젝트: wubinzzu/RecSys
        public static void GetPearsonOfColumns(DataMatrix R, int maxCountOfNeighbors,
                                               double strongSimilarityThreshold, out SimilarityData neighborsByObject,
                                               out HashSet <Tuple <int, int> > strongSimilarityIndicators)
        {
            ComputeSimilarities(R.Matrix.Transpose(), SimilarityMetric.PearsonRating, maxCountOfNeighbors,
                                strongSimilarityThreshold, out neighborsByObject, out strongSimilarityIndicators);

            // Debug
            for (int i = 0; i < R.ItemCount && i < 100; i++)
            {
                for (int j = 0; j < R.ItemCount && j < 100; j++)
                {
                    if (i == j)
                    {
                        continue;
                    }
                    double corr_ij = Correlation.Pearson((SparseVector)R.Matrix.Column(i), (SparseVector)R.Matrix.Column(j));
                    if (corr_ij > strongSimilarityThreshold)
                    {
                        Debug.Assert(strongSimilarityIndicators.Contains(new Tuple <int, int>(i, j)));
                        Debug.Assert(strongSimilarityIndicators.Contains(new Tuple <int, int>(j, i)));
                    }
                }
            }
        }
예제 #3
0
파일: Metric.cs 프로젝트: lawrencewu/RecSys
 public static void GetCosineOfRows(DataMatrix R, int maxCountOfNeighbors, 
     double strongSimilarityThreshold, out SimilarityData neighborsByObject)
 {
     HashSet<Tuple<int, int>> foo;
     ComputeSimilarities(R.Matrix, SimilarityMetric.CosineRating, maxCountOfNeighbors,
         strongSimilarityThreshold, out neighborsByObject, out foo);
 }
예제 #4
0
파일: Metric.cs 프로젝트: lawrencewu/RecSys
 public static void GetCosineOfColumns(DataMatrix R, int maxCountOfNeighbors,
     double strongSimilarityThreshold, out SimilarityData neighborsByObject,
     out HashSet<Tuple<int, int>> strongSimilarityIndicators)
 {
     // Just rotate the matrix
     ComputeSimilarities(R.Matrix.Transpose(), SimilarityMetric.CosineRating, maxCountOfNeighbors,
         strongSimilarityThreshold, out neighborsByObject, out strongSimilarityIndicators);
 }
예제 #5
0
파일: Metric.cs 프로젝트: wubinzzu/RecSys
 public static void GetCosineOfColumns(DataMatrix R, int maxCountOfNeighbors,
                                       double strongSimilarityThreshold, out SimilarityData neighborsByObject,
                                       out HashSet <Tuple <int, int> > strongSimilarityIndicators)
 {
     // Just rotate the matrix
     ComputeSimilarities(R.Matrix.Transpose(), SimilarityMetric.CosineRating, maxCountOfNeighbors,
                         strongSimilarityThreshold, out neighborsByObject, out strongSimilarityIndicators);
 }
예제 #6
0
파일: Metric.cs 프로젝트: wubinzzu/RecSys
        public static void GetCosineOfRows(DataMatrix R, int maxCountOfNeighbors,
                                           double strongSimilarityThreshold, out SimilarityData neighborsByObject)
        {
            HashSet <Tuple <int, int> > foo;

            ComputeSimilarities(R.Matrix, SimilarityMetric.CosineRating, maxCountOfNeighbors,
                                strongSimilarityThreshold, out neighborsByObject, out foo);
        }
예제 #7
0
파일: Metric.cs 프로젝트: wubinzzu/RecSys
        /// <summary>
        /// Switch between different metrics.
        /// </summary>
        /// <param name="PR"></param>
        /// <param name="similarityMetric"></param>
        /// <returns></returns>
        private static void ComputeSimilarities(PrefRelations PR,
                                                Metric.SimilarityMetric similarityMetric, int maxCountOfNeighbors,
                                                double minSimilarityThreshold, out SimilarityData neighborsByObject,
                                                out HashSet <Tuple <int, int> > strongSimilarityIndicators)
        {
            int dimension = PR.UserCount;
            HashSet <Tuple <int, int> > strongSimilarityIndicators_out = new HashSet <Tuple <int, int> >();
            SimilarityData neighborsByObject_out = new SimilarityData(maxCountOfNeighbors);

            // Compute similarity for the lower triangular
            Object lockMe = new Object();

            Parallel.For(0, dimension, i =>
            {
                Utils.PrintEpoch("Progress current/total", i, dimension);

                for (int j = 0; j < dimension; j++)
                {
                    if (i == j)
                    {
                        continue;
                    }                         // Skip self similarity

                    else if (i > j)
                    {
                        switch (similarityMetric)
                        {
                        case SimilarityMetric.CosinePrefRelations:
                            double cosinePR = Metric.cosinePR(PR, i, j);
                            lock (lockMe)
                            {
                                if (cosinePR > minSimilarityThreshold)
                                {
                                    strongSimilarityIndicators_out.Add(new Tuple <int, int>(i, j));
                                }
                                neighborsByObject_out.AddSimilarityData(i, j, cosinePR);
                                neighborsByObject_out.AddSimilarityData(j, i, cosinePR);
                            }
                            break;
                            // More metrics to be added here.
                        }
                    }
                }
            });

            neighborsByObject          = neighborsByObject_out;
            strongSimilarityIndicators = strongSimilarityIndicators_out;
        }
예제 #8
0
파일: Metric.cs 프로젝트: lawrencewu/RecSys
        public static void GetPearsonOfColumns(DataMatrix R, int maxCountOfNeighbors,
            double strongSimilarityThreshold, out SimilarityData neighborsByObject,
            out HashSet<Tuple<int, int>> strongSimilarityIndicators)
        {
            ComputeSimilarities(R.Matrix.Transpose(), SimilarityMetric.PearsonRating, maxCountOfNeighbors,
                strongSimilarityThreshold, out neighborsByObject, out strongSimilarityIndicators);

            // Debug
            for(int i = 0; i < R.ItemCount&&i<100; i++)
            {
                for (int j = 0; j < R.ItemCount&&j<100; j++)
                {
                    if (i == j) continue;
                    double corr_ij = Correlation.Pearson((SparseVector)R.Matrix.Column(i),(SparseVector)R.Matrix.Column(j));
                    if(corr_ij>strongSimilarityThreshold)
                    {
                        Debug.Assert(strongSimilarityIndicators.Contains(new Tuple<int, int>(i, j)));
                        Debug.Assert(strongSimilarityIndicators.Contains(new Tuple<int, int>(j, i)));
                    }
                }
            }
        }
예제 #9
0
        /// <summary>
        /// The user-based KNN collaborative filtering described in paper: 
        /// Resnick, P., et al., "GroupLens: an open architecture for collaborative filtering of netnews", 1994.
        /// Link: http://dx.doi.org/10.1145/192844.192905
        /// </summary>
        /// <param name="R_train"></param>
        /// <param name="R_unknown"></param>
        /// <param name="K"></param>
        /// <returns></returns>
        public static DataMatrix PredictRatings(DataMatrix R_train, DataMatrix R_unknown, SimilarityData neighborsByUser, int K)
        {
            // Debug
            Debug.Assert(R_train.UserCount == R_unknown.UserCount);
            Debug.Assert(R_train.ItemCount == R_unknown.ItemCount);
            int cappedCount = 0, globalMeanCount = 0;

            // This matrix stores predictions
            DataMatrix R_predicted = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);

            // Basic statistics from train set
            double globalMean = R_train.GetGlobalMean();
            Vector<double> meanByUser = R_train.GetUserMeans();
            Vector<double> meanByItem = R_train.GetItemMeans();

            // Predict ratings for each test user
            // Single thread appears to be very fast, parallel.foreach is unnecessary
            Object lockMe = new Object();
            Parallel.ForEach(R_unknown.Users, user =>
            {
                int indexOfUser = user.Item1;
                RatingVector userRatings = new RatingVector(R_train.GetRow(indexOfUser));
                RatingVector unknownRatings = new RatingVector(user.Item2);

                Utils.PrintEpoch("Predicting user/total", indexOfUser, R_train.UserCount);

                // Note that there are more than K neighbors in the list (sorted by similarity)
                // we will use the top-K neighbors WHO HAVE RATED THE ITEM
                // For example we have 200 top neighbors, and we hope there are
                // K neighbors in the list have rated the item. We can't keep
                // everyone in the neighbor list because there are too many for large data sets
                var topNeighborsOfUser = neighborsByUser[indexOfUser];
                //Dictionary<int, double> topKNeighbors = KNNCore.GetTopKNeighborsByUser(userSimilarities, indexOfUser, K);

                double meanOfUser = meanByUser[indexOfUser];

                // Loop through each ratingto be predicted
                foreach (Tuple<int, double> unknownRating in unknownRatings.Ratings)
                {
                    int itemIndex = unknownRating.Item1;
                    double prediction;

                    // TODO: we actually should use the Top-K neighbors
                    // that have rated this item, otherwise we may have
                    // only a few neighbors rated this item

                    // Compute the average rating on item iid given 
                    // by the top K neighbors. Each rating is offsetted by
                    // the neighbor's average and weighted by the similarity
                    double weightedSum = 0;
                    double weightSum = 0;
                    int currentTopKCount = 0;
                    foreach (KeyValuePair<int, double> neighbor in topNeighborsOfUser)
                    {
                        int neighborIndex = neighbor.Key;
                        double similarityOfNeighbor = neighbor.Value;
                        double itemRatingOfNeighbor = R_train[neighborIndex, itemIndex];

                        // We count only if the neighbor has seen this item before
                        if (itemRatingOfNeighbor != 0)
                        {
                            weightSum += similarityOfNeighbor;
                            weightedSum += (itemRatingOfNeighbor - meanByUser[neighborIndex]) * similarityOfNeighbor;
                            currentTopKCount++;
                            if (currentTopKCount >= K) { break; }   // Stop when we have seen K neighbors
                        }
                    }
                    // A zero weightedSum means this is a cold item and global mean will be assigned by default
                    if (weightedSum != 0)
                    {
                        prediction = meanOfUser + weightedSum / weightSum;
                    }
                    else
                    {
                        prediction = globalMean;
                        globalMeanCount++;
                    }

                    // Cap the ratings
                    if (prediction > Config.Ratings.MaxRating)
                    {
                        cappedCount++;
                        prediction = Config.Ratings.MaxRating;
                    }
                    if (prediction < Config.Ratings.MinRating)
                    {
                        cappedCount++;
                        prediction = Config.Ratings.MinRating;
                    }

                    lock (lockMe)
                    {
                        R_predicted[indexOfUser, itemIndex] = prediction;
                    }
                }
            });
            Utils.PrintValue("# capped predictions", cappedCount.ToString("D"));
            Utils.PrintValue("# default predictions", globalMeanCount.ToString("D"));
            return R_predicted;
        }
예제 #10
0
파일: Metric.cs 프로젝트: lawrencewu/RecSys
        private static void ComputeSimilarities(Matrix<double> R, 
            Metric.SimilarityMetric similarityMetric, int maxCountOfNeighbors,
            double minSimilarityThreshold,  out SimilarityData neighborsByObject,
            out HashSet<Tuple<int, int>> strongSimilarityIndicators)
        {
            int dimension = R.RowCount;
            List<Vector<double>> rows = new List<Vector<double>>(R.EnumerateRows());

            // I assume that the rows are enumerated from first to last
            Debug.Assert(rows[0].Sum() == R.Row(0).Sum());
            Debug.Assert(rows[rows.Count - 1].Sum() == R.Row(rows.Count - 1).Sum());

            List<Tuple<int, int>> strongSimilarityIndicators_out = new List<Tuple<int, int>>();

            SimilarityData neighborsByObject_out = new SimilarityData(maxCountOfNeighbors);

            Object lockMe = new Object();
            Parallel.For(0, dimension, indexOfRow =>
            {
                Utils.PrintEpoch("Progress current/total", indexOfRow, dimension);
                Dictionary<Tuple<int, int>,double> similarityCache = new Dictionary<Tuple<int, int>,double>();
                List<Tuple<int, int>> strongSimilarityIndocatorCache = new List<Tuple<int, int>>();

                for (int indexOfNeighbor = 0; indexOfNeighbor < dimension; indexOfNeighbor++)
                {
                    if (indexOfRow == indexOfNeighbor) { continue; } // Skip self similarity

                    else if (indexOfRow > indexOfNeighbor)
                    {
                        switch (similarityMetric)
                        {
                            case Metric.SimilarityMetric.CosineRating:
                                // TODO: make a note that it really matters to make it sparse, it computes differently!
                                double cosine = Metric.CosineR((SparseVector)rows[indexOfRow], (SparseVector)rows[indexOfNeighbor]);
                                    if(cosine >  minSimilarityThreshold)
                                    {
                                        strongSimilarityIndocatorCache.Add(new Tuple<int, int>(indexOfRow, indexOfNeighbor));
                                        strongSimilarityIndocatorCache.Add(new Tuple<int, int>(indexOfNeighbor, indexOfRow));
                                    }
                                    similarityCache[new Tuple<int, int>(indexOfRow, indexOfNeighbor)] = cosine;
                                    similarityCache[new Tuple<int, int>(indexOfNeighbor, indexOfRow)] = cosine;

                                break;
                            case Metric.SimilarityMetric.PearsonRating:
                                double pearson = Metric.PearsonR((SparseVector)rows[indexOfRow], (SparseVector)rows[indexOfNeighbor]);
                                    if (pearson> minSimilarityThreshold)
                                    {
                                        strongSimilarityIndocatorCache.Add(new Tuple<int, int>(indexOfRow, indexOfNeighbor));
                                        strongSimilarityIndocatorCache.Add(new Tuple<int, int>(indexOfNeighbor, indexOfRow));
                                    }
                                    similarityCache[new Tuple<int, int>(indexOfRow, indexOfNeighbor)] = pearson;
                                    similarityCache[new Tuple<int, int>(indexOfNeighbor, indexOfRow)] = pearson;

                                break;
                        }
                    }
                }

                lock (lockMe)
                {
                    foreach(var entry in similarityCache)
                    {
                        neighborsByObject_out.AddSimilarityData(entry.Key.Item1, entry.Key.Item2, entry.Value);
                    }
                    strongSimilarityIndicators_out.AddRange(strongSimilarityIndocatorCache);
                }
            });

            neighborsByObject = neighborsByObject_out;
            neighborsByObject.SortAndRemoveNeighbors();
            strongSimilarityIndicators = new HashSet<Tuple<int,int>>(strongSimilarityIndicators_out);
        }
예제 #11
0
파일: Metric.cs 프로젝트: lawrencewu/RecSys
        /// <summary>
        /// Switch between different metrics.
        /// </summary>
        /// <param name="PR"></param>
        /// <param name="similarityMetric"></param>
        /// <returns></returns>
        private static void ComputeSimilarities(PrefRelations PR,
            Metric.SimilarityMetric similarityMetric, int maxCountOfNeighbors,
                        double minSimilarityThreshold, out SimilarityData neighborsByObject,
            out HashSet<Tuple<int, int>> strongSimilarityIndicators)
        {
            int dimension = PR.UserCount;
            HashSet<Tuple<int, int>> strongSimilarityIndicators_out = new HashSet<Tuple<int, int>>();
            SimilarityData neighborsByObject_out = new SimilarityData(maxCountOfNeighbors);

            // Compute similarity for the lower triangular
            Object lockMe = new Object();
            Parallel.For(0, dimension, i =>
            {
                Utils.PrintEpoch("Progress current/total", i, dimension);

                for (int j = 0; j < dimension; j++)
                {
                    if (i == j) { continue; } // Skip self similarity

                    else if (i > j)
                    {
                        switch (similarityMetric)
                        {
                            case SimilarityMetric.CosinePrefRelations:
                                double cosinePR = Metric.cosinePR(PR, i, j);
                                lock (lockMe)
                                {
                                    if (cosinePR > minSimilarityThreshold)
                                    {
                                        strongSimilarityIndicators_out.Add(new Tuple<int, int>(i, j));
                                    }
                                    neighborsByObject_out.AddSimilarityData(i, j, cosinePR);
                                    neighborsByObject_out.AddSimilarityData(j, i, cosinePR);
                                }
                                break;
                            // More metrics to be added here.
                        }
                    }
                }
            });

            neighborsByObject = neighborsByObject_out;
            strongSimilarityIndicators = strongSimilarityIndicators_out;
        }
예제 #12
0
        public string GetReadyForOrdinal(bool saveLoadedData = true)
        {
            if (!ReadyForNumerical) { GetReadyForNumerical(); }
            if (ReadyForOrdinal) { return "Is ready."; }

            StringBuilder log = new StringBuilder();
            Utils.StartTimer();
            log.AppendLine(Utils.PrintHeading("Prepare preferecen relation data"));

            Console.WriteLine("Converting R_train into PR_train");
            log.AppendLine("Converting R_train into PR_train");
            PR_train = PrefRelations.CreateDiscrete(R_train);

            //Console.WriteLine("Converting R_test into PR_test");
            //log.AppendLine("Converting R_test into PR_test");
            //PR_test = PrefRelations.CreateDiscrete(R_test);

            log.AppendLine(Utils.StopTimer());

            #region Prepare similarity data
            if (File.Exists(GetDataFileName("USP"))
                && File.Exists(GetDataFileName("ISP"))
                && File.Exists(GetDataFileName("SSIIP")))
            {

                Utils.StartTimer();
                Utils.PrintHeading("Load user, item, indicators variables (Pref based)");
                UserSimilaritiesOfPref = Utils.IO<SimilarityData>.LoadObject(GetDataFileName("USP"));
                ItemSimilaritiesOfPref = Utils.IO<SimilarityData>.LoadObject(GetDataFileName("ISP"));
                StrongSimilarityIndicatorsByItemPref = Utils.IO<HashSet<Tuple<int, int>>>.LoadObject(GetDataFileName("SSIIP"));
                Utils.StopTimer();
            }
            else
            {
                Utils.StartTimer();
                Utils.PrintHeading("Compute user-user similarities (Pref based)");
                Metric.GetCosineOfPrefRelations(PR_train, MaxCountOfNeighbors, 
                    StrongSimilarityThreshold, out UserSimilaritiesOfPref);
                Utils.StopTimer();

                // For the moment, we use user-wise preferences to compute
                // item-item similarities, it is not the same as user-user pref similarities
                Utils.StartTimer();
                Utils.PrintHeading("Compute item-item similarities (Pref based)");
                DataMatrix PR_userwise_preferences = new DataMatrix(PR_train.GetPositionMatrix());
                Metric.GetPearsonOfColumns(PR_userwise_preferences, MaxCountOfNeighbors, StrongSimilarityThreshold,
                    out ItemSimilaritiesOfPref, out StrongSimilarityIndicatorsByItemPref);
                Utils.StopTimer();

                if (saveLoadedData)
                {
                    Utils.IO<SimilarityData>.SaveObject(UserSimilaritiesOfPref, GetDataFileName("USP"));
                    Utils.IO<SimilarityData>.SaveObject(ItemSimilaritiesOfPref, GetDataFileName("ISP"));
                    Utils.IO<HashSet<Tuple<int,int>>>
                        .SaveObject(StrongSimilarityIndicatorsByItemPref, GetDataFileName("SSIIP"));
                }
                Utils.StopTimer();

            }
            #endregion

            

            ReadyForOrdinal = true;

            return log.ToString();
        }
예제 #13
0
        public string GetReadyForNumerical(bool saveLoadedData = true)
        {
            if (ReadyForNumerical) { return "Is ready."; }

            StringBuilder log = new StringBuilder();
            Utils.StartTimer();

            log.AppendLine(Utils.PrintHeading("Create R_train/R_test sets from " + DataSetFile));
            Utils.LoadMovieLensSplitByCount(DataSetFile, out R_train,
                out R_test, MinCountOfRatings, MaxCountOfRatings, CountOfRatingsForTrain, ShuffleData, Seed);

            Console.WriteLine(R_train.DatasetBrief("Train set"));
            Console.WriteLine(R_test.DatasetBrief("Test set"));
            log.AppendLine(R_train.DatasetBrief("Train set"));
            log.AppendLine(R_test.DatasetBrief("Test set"));

            R_unknown = R_test.IndexesOfNonZeroElements();

            log.AppendLine(Utils.PrintValue("Relevant item criteria", RelevantItemCriteria.ToString("0.0")));
            RelevantItemsByUser = ItemRecommendationCore.GetRelevantItemsByUser(R_test, RelevantItemCriteria);
            log.AppendLine(Utils.PrintValue("Mean # of relevant items per user",
                RelevantItemsByUser.Average(k => k.Value.Count).ToString("0")));
            log.AppendLine(Utils.StopTimer());

            #region Prepare similarity data
            if (File.Exists(GetDataFileName("USR"))
                && File.Exists(GetDataFileName("ISR"))
                && File.Exists(GetDataFileName("SSIIR")))
            {
                Utils.StartTimer();
                Utils.PrintHeading("Load user-user similarities (rating based)");
                UserSimilaritiesOfRating = Utils.IO<SimilarityData>.LoadObject(GetDataFileName("USR"));
                Utils.StopTimer();

                Utils.StartTimer();
                Utils.PrintHeading("Load item-item similarities (rating based)");
                ItemSimilaritiesOfRating = Utils.IO<SimilarityData>.LoadObject(GetDataFileName("ISR"));
                Utils.StopTimer();

                Utils.StartTimer();
                Utils.PrintHeading("Load item-item strong similarity indicators (rating based)");
                StrongSimilarityIndicatorsByItemRating = Utils.IO<HashSet<Tuple<int, int>>>.LoadObject(GetDataFileName("SSIIR"));
                Utils.StopTimer();
            }
            else
            {
                Utils.StartTimer();
                Utils.PrintHeading("Compute user-user similarities (rating based)");
                Metric.GetPearsonOfRows(R_train, MaxCountOfNeighbors,StrongSimilarityThreshold,
                    out UserSimilaritiesOfRating);
                if (saveLoadedData) 
                {
                    Utils.IO<SimilarityData>.SaveObject(UserSimilaritiesOfRating, GetDataFileName("USR"));
                }
                Utils.StopTimer();

                Utils.StartTimer();
                Utils.PrintHeading("Compute item-item similarities (rating based)");
                Metric.GetPearsonOfColumns(R_train, MaxCountOfNeighbors, StrongSimilarityThreshold, 
                    out ItemSimilaritiesOfRating, out StrongSimilarityIndicatorsByItemRating);
                if (saveLoadedData)
                {
                    Utils.IO<SimilarityData>.SaveObject(ItemSimilaritiesOfRating, GetDataFileName("ISR"));
                    Utils.IO<HashSet<Tuple<int,int>>>
                        .SaveObject(StrongSimilarityIndicatorsByItemRating, GetDataFileName("SSIIR"));
                }
                Utils.StopTimer();
            }
            #endregion

            ReadyForNumerical = true;

            return log.ToString();
        }
예제 #14
0
        public static DataMatrix PredictRatings(PrefRelations PR_train,
            DataMatrix R_unknown, int K, SimilarityData neighborsByUser)
        {
            Debug.Assert(PR_train.UserCount == R_unknown.UserCount);
            Debug.Assert(PR_train.ItemCount == R_unknown.ItemCount);

            // This matrix stores predictions
            DataMatrix R_predicted = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);

            // This can be considered as the R_train in standard UserKNN
            SparseMatrix positionMatrix = PR_train.GetPositionMatrix();
            DataMatrix ratingMatrixFromPositions = new DataMatrix(positionMatrix);

            Vector<double> meanByUser = ratingMatrixFromPositions.GetUserMeans();
            Vector<double> meanByItem = ratingMatrixFromPositions.GetItemMeans();
            double globalMean = ratingMatrixFromPositions.GetGlobalMean();

            // Predict positions for each test user
            // Appears to be very fast, parallel.foreach is unnecessary
            foreach (Tuple<int, Vector<double>> user in R_unknown.Users)
            {
                int indexOfUser = user.Item1;
                Vector<double> indexesOfUnknownRatings = user.Item2;

                Utils.PrintEpoch("Predicting user/total", indexOfUser, PR_train.UserCount);

                // Note that there are more than K neighbors in the list (sorted by similarity)
                // we will use the top-K neighbors WHO HAVE RATED THE ITEM
                // For example we have 200 top neighbors, and we hope there are
                // K neighbors in the list have rated the item. We can't keep
                // everyone in the neighbor list because there are too many for large data sets
                var topNeighborsOfUser = neighborsByUser[indexOfUser];

                double meanOfUser = meanByUser[indexOfUser];

                // Loop through each position to be predicted
                foreach (Tuple<int, double> unknownRating in indexesOfUnknownRatings.EnumerateIndexed(Zeros.AllowSkip))
                {
                    int indexOfUnknownItem = unknownRating.Item1;

                    // Compute the position of this item for the user
                    // by combining neighbors' positions on this item
                    double weightedSum = 0;
                    double weightSum = 0;
                    int currentTopKCount = 0;
                    foreach (KeyValuePair<int, double> neighbor in topNeighborsOfUser)
                    {
                        int indexOfNeighbor = neighbor.Key;
                        double similarityOfNeighbor = neighbor.Value;
                        double itemPositionOfNeighbor = ratingMatrixFromPositions[indexOfNeighbor, indexOfUnknownItem];

                        // We count only if the neighbor has seen this item before
                        if (itemPositionOfNeighbor != 0)
                        {
                            // Recall that we use a constant to hold position value 0
                            // we revert it back here
                            if (itemPositionOfNeighbor == Config.ZeroInSparseMatrix)
                            {
                                Debug.Assert(true, "By using the PositionShift constant, we should not be in here.");
                                itemPositionOfNeighbor = 0;
                            }
                            weightSum += similarityOfNeighbor;
                            weightedSum += (itemPositionOfNeighbor - meanByUser[indexOfNeighbor]) * similarityOfNeighbor;
                            currentTopKCount++;
                            if(currentTopKCount>= K)
                            {
                                break;
                            }
                        }
                    }

                    // If any neighbor has seen this item
                    if (currentTopKCount != 0)
                    {
                        // TODO: Add user mean may improve the performance
                        R_predicted[indexOfUser, indexOfUnknownItem] = meanOfUser + weightedSum / weightSum;
                    }
                    else
                    {
                        R_predicted[indexOfUser, indexOfUnknownItem] = globalMean;
                    }
                }
            }//);
            return R_predicted;
        }
예제 #15
0
파일: Metric.cs 프로젝트: wubinzzu/RecSys
        private static void ComputeSimilarities(Matrix <double> R,
                                                Metric.SimilarityMetric similarityMetric, int maxCountOfNeighbors,
                                                double minSimilarityThreshold, out SimilarityData neighborsByObject,
                                                out HashSet <Tuple <int, int> > strongSimilarityIndicators)
        {
            int dimension = R.RowCount;
            List <Vector <double> > rows = new List <Vector <double> >(R.EnumerateRows());

            // I assume that the rows are enumerated from first to last
            Debug.Assert(rows[0].Sum() == R.Row(0).Sum());
            Debug.Assert(rows[rows.Count - 1].Sum() == R.Row(rows.Count - 1).Sum());

            List <Tuple <int, int> > strongSimilarityIndicators_out = new List <Tuple <int, int> >();

            SimilarityData neighborsByObject_out = new SimilarityData(maxCountOfNeighbors);

            Object lockMe = new Object();

            Parallel.For(0, dimension, indexOfRow =>
            {
                Utils.PrintEpoch("Progress current/total", indexOfRow, dimension);
                Dictionary <Tuple <int, int>, double> similarityCache   = new Dictionary <Tuple <int, int>, double>();
                List <Tuple <int, int> > strongSimilarityIndocatorCache = new List <Tuple <int, int> >();

                for (int indexOfNeighbor = 0; indexOfNeighbor < dimension; indexOfNeighbor++)
                {
                    if (indexOfRow == indexOfNeighbor)
                    {
                        continue;
                    }                                                // Skip self similarity

                    else if (indexOfRow > indexOfNeighbor)
                    {
                        switch (similarityMetric)
                        {
                        case Metric.SimilarityMetric.CosineRating:
                            // TODO: make a note that it really matters to make it sparse, it computes differently!
                            double cosine = Metric.CosineR((SparseVector)rows[indexOfRow], (SparseVector)rows[indexOfNeighbor]);
                            if (cosine > minSimilarityThreshold)
                            {
                                strongSimilarityIndocatorCache.Add(new Tuple <int, int>(indexOfRow, indexOfNeighbor));
                                strongSimilarityIndocatorCache.Add(new Tuple <int, int>(indexOfNeighbor, indexOfRow));
                            }
                            similarityCache[new Tuple <int, int>(indexOfRow, indexOfNeighbor)] = cosine;
                            similarityCache[new Tuple <int, int>(indexOfNeighbor, indexOfRow)] = cosine;

                            break;

                        case Metric.SimilarityMetric.PearsonRating:
                            double pearson = Metric.PearsonR((SparseVector)rows[indexOfRow], (SparseVector)rows[indexOfNeighbor]);
                            if (pearson > minSimilarityThreshold)
                            {
                                strongSimilarityIndocatorCache.Add(new Tuple <int, int>(indexOfRow, indexOfNeighbor));
                                strongSimilarityIndocatorCache.Add(new Tuple <int, int>(indexOfNeighbor, indexOfRow));
                            }
                            similarityCache[new Tuple <int, int>(indexOfRow, indexOfNeighbor)] = pearson;
                            similarityCache[new Tuple <int, int>(indexOfNeighbor, indexOfRow)] = pearson;

                            break;
                        }
                    }
                }

                lock (lockMe)
                {
                    foreach (var entry in similarityCache)
                    {
                        neighborsByObject_out.AddSimilarityData(entry.Key.Item1, entry.Key.Item2, entry.Value);
                    }
                    strongSimilarityIndicators_out.AddRange(strongSimilarityIndocatorCache);
                }
            });

            neighborsByObject = neighborsByObject_out;
            neighborsByObject.SortAndRemoveNeighbors();
            strongSimilarityIndicators = new HashSet <Tuple <int, int> >(strongSimilarityIndicators_out);
        }
예제 #16
0
파일: Metric.cs 프로젝트: wubinzzu/RecSys
        public static void GetCosineOfPrefRelations(PrefRelations PR, int maxCountOfNeighbors,
                                                    double strongSimilarityThreshold, out SimilarityData neighborsByObject)
        {
            HashSet <Tuple <int, int> > foo;

            ComputeSimilarities(PR, SimilarityMetric.CosinePrefRelations, maxCountOfNeighbors,
                                strongSimilarityThreshold, out neighborsByObject, out foo);
        }