C# (CSharp) DataMatrix.GetItemMeans 예제들

프로그래밍 언어: C# (CSharp)

클래스/타입: DataMatrix

메소드/함수: GetItemMeans

hotexamples.com에서의 예제들: 3

C# (CSharp) DataMatrix.GetItemMeans - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C# (CSharp)의 DataMatrix.GetItemMeans에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

AddRow(10)

IndexOf(7)

drawBarcode(3)

GetItemMeans(3)

Load(3)

IndexesOfNonZeroElements(3)

GetGlobalMean(3)

Quantization(2)

GetNonZerosCountOfRow(2)

GetUserMeans(2)

GetItemsByUser(2)

GetCountersList(2)

GetRow(1)

GetBarcodeImage(1)

FindRow(1)

DatasetBrief(1)

IsEquivalent(1)

Contains(1)

MergeNonOverlap(1)

Multiply(1)

PointwiseMultiply(1)

AppendMatrix(1)

RemoveRow(1)

SaveBarcodeToFile(1)

WriteToAsciiFile(1)

AddData(1)

예제 #1

파일 보기

        /// <summary>
        /// Recommend the most popular (measured by mean rating) items to all users.
        /// </summary>
        public string RunMostPopular(int topN)
        {
            if (!ReadyForNumerical)
            {
                GetReadyForNumerical();
            }
            StringBuilder log = new StringBuilder();

            log.AppendLine(Utils.PrintHeading("Most popular"));

            // Prediction
            Utils.StartTimer();
            var        meanByItem  = R_train.GetItemMeans();
            DataMatrix R_predicted = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);

            foreach (var element in R_unknown.Matrix.EnumerateIndexed(Zeros.AllowSkip))
            {
                int indexOfUser = element.Item1;
                int indexOfItem = element.Item2;
                R_predicted[indexOfUser, indexOfItem] = meanByItem[indexOfItem];
            }
            var topNItemsByUser = ItemRecommendationCore.GetTopNItemsByUser(R_predicted, topN);

            log.AppendLine(Utils.StopTimer());

            // TopN Evaluation
            for (int n = 1; n <= topN; n++)
            {
                log.AppendLine(Utils.PrintValue("NCDG@" + n, NCDG.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")));
            }
            for (int n = 1; n <= topN; n++)
            {
                log.AppendLine(Utils.PrintValue("MAP@" + n, MAP.Evaluate(RelevantItemsByUser, topNItemsByUser, n).ToString("0.0000")));
            }

            return(log.ToString());
        }

예제 #2

파일 보기

파일: PrefUserKNN.cs 프로젝트: wubinzzu/RecSys

        public static DataMatrix PredictRatings(PrefRelations PR_train,
                                                DataMatrix R_unknown, int K, SimilarityData neighborsByUser)
        {
            Debug.Assert(PR_train.UserCount == R_unknown.UserCount);
            Debug.Assert(PR_train.ItemCount == R_unknown.ItemCount);

            // This matrix stores predictions
            DataMatrix R_predicted = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);

            // This can be considered as the R_train in standard UserKNN
            SparseMatrix positionMatrix            = PR_train.GetPositionMatrix();
            DataMatrix   ratingMatrixFromPositions = new DataMatrix(positionMatrix);

            Vector <double> meanByUser = ratingMatrixFromPositions.GetUserMeans();
            Vector <double> meanByItem = ratingMatrixFromPositions.GetItemMeans();
            double          globalMean = ratingMatrixFromPositions.GetGlobalMean();

            // Predict positions for each test user
            // Appears to be very fast, parallel.foreach is unnecessary
            foreach (Tuple <int, Vector <double> > user in R_unknown.Users)
            {
                int             indexOfUser             = user.Item1;
                Vector <double> indexesOfUnknownRatings = user.Item2;

                Utils.PrintEpoch("Predicting user/total", indexOfUser, PR_train.UserCount);

                // Note that there are more than K neighbors in the list (sorted by similarity)
                // we will use the top-K neighbors WHO HAVE RATED THE ITEM
                // For example we have 200 top neighbors, and we hope there are
                // K neighbors in the list have rated the item. We can't keep
                // everyone in the neighbor list because there are too many for large data sets
                var topNeighborsOfUser = neighborsByUser[indexOfUser];

                double meanOfUser = meanByUser[indexOfUser];

                // Loop through each position to be predicted
                foreach (Tuple <int, double> unknownRating in indexesOfUnknownRatings.EnumerateIndexed(Zeros.AllowSkip))
                {
                    int indexOfUnknownItem = unknownRating.Item1;

                    // Compute the position of this item for the user
                    // by combining neighbors' positions on this item
                    double weightedSum      = 0;
                    double weightSum        = 0;
                    int    currentTopKCount = 0;
                    foreach (KeyValuePair <int, double> neighbor in topNeighborsOfUser)
                    {
                        int    indexOfNeighbor        = neighbor.Key;
                        double similarityOfNeighbor   = neighbor.Value;
                        double itemPositionOfNeighbor = ratingMatrixFromPositions[indexOfNeighbor, indexOfUnknownItem];

                        // We count only if the neighbor has seen this item before
                        if (itemPositionOfNeighbor != 0)
                        {
                            // Recall that we use a constant to hold position value 0
                            // we revert it back here
                            if (itemPositionOfNeighbor == Config.ZeroInSparseMatrix)
                            {
                                Debug.Assert(true, "By using the PositionShift constant, we should not be in here.");
                                itemPositionOfNeighbor = 0;
                            }
                            weightSum   += similarityOfNeighbor;
                            weightedSum += (itemPositionOfNeighbor - meanByUser[indexOfNeighbor]) * similarityOfNeighbor;
                            currentTopKCount++;
                            if (currentTopKCount >= K)
                            {
                                break;
                            }
                        }
                    }

                    // If any neighbor has seen this item
                    if (currentTopKCount != 0)
                    {
                        // TODO: Add user mean may improve the performance
                        R_predicted[indexOfUser, indexOfUnknownItem] = meanOfUser + weightedSum / weightSum;
                    }
                    else
                    {
                        R_predicted[indexOfUser, indexOfUnknownItem] = globalMean;
                    }
                }
            }//);
            return(R_predicted);
        }

예제 #3

파일 보기

파일: ORF.cs 프로젝트: wubinzzu/RecSys

        public void PredictRatings(DataMatrix R_train, DataMatrix R_unknown,
                                   HashSet <Tuple <int, int> > strongSimilarityIndicators,
                                   Dictionary <Tuple <int, int>, List <double> > OMFDistributions,
                                   double regularization, double learnRate, int maxEpoch, int ratingLevels,
                                   out DataMatrix R_predicted_expectations, out DataMatrix R_predicted_mostlikely)
        {
            /************************************************************
            *   Parameterization and Initialization
            ************************************************************/
            #region Parameterization and Initialization
            int userCount = R_train.UserCount;
            int itemCount = R_train.ItemCount;
            meanByUser               = R_train.GetUserMeans(); // Mean value of each user
            meanByItem               = R_train.GetItemMeans(); // Mean value of each item
            this.R_train             = R_train;
            this.OMFDistributions    = OMFDistributions;
            R_predicted_expectations = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);
            R_predicted_mostlikely   = new DataMatrix(R_unknown.UserCount, R_unknown.ItemCount);


            // Initialize the weights
            this.strongSimilarityIndicators = strongSimilarityIndicators;
            featureWeightByItemItem         = new Dictionary <Tuple <int, int>, double>(strongSimilarityIndicators.Count);

            // Initialize all strong item-item features
            Random rnd = new Random(Config.Seed);

            foreach (var strongSimilarityPair in strongSimilarityIndicators)
            {
                double randomWeight = rnd.NextDouble() * 0.01;
                featureWeightByItemItem[strongSimilarityPair] = randomWeight;
            }

            // We cache here which items have been rated by the given user
            // it will be reused in every feature update
            Dictionary <int, List <int> > itemsByUser = R_train.GetItemsByUser();

            // TODO: we actually stored more features, because some items may not be co-rated by any user
            Utils.PrintValue("# of item-item features", (featureWeightByItemItem.Count / 2).ToString());

            #endregion

            /************************************************************
            *   Learn weights from training data R_train
            ************************************************************/
            #region Learn weights from training data R_train
            double likelihood_prev = -double.MaxValue;
            for (int epoch = 0; epoch < maxEpoch; epoch++)
            {
                /************************************************************
                *   Apply Eq. 23 and 24
                ************************************************************/
                #region Apply Eq. 23 and 24
                // Unlike NMF which uses each rating as the input for training,
                // here the set of ratings by each user is the input for each pass
                foreach (var user in R_train.Users)
                {
                    int             indexOfUser   = user.Item1;
                    Vector <double> ratingsOfUser = user.Item2;
                    Debug.Assert(ratingsOfUser.Storage.IsDense == false, "The user ratings should be stored in a sparse vector.");

                    List <int> itemsOfUser = itemsByUser[indexOfUser];   // Cache the items rated by this user

                    // Now we select one rating r_ui from the user's ratings R_u,
                    // and use this rating to combine with each other rating r_uj in R_u
                    // so that we can refine the weight associated to i-j item pair co-rated by this user
                    foreach (var item_i in ratingsOfUser.EnumerateIndexed(Zeros.AllowSkip))
                    {
                        int    indexOfItem_i = item_i.Item1;
                        int    r_ui          = (int)R_train[indexOfUser, indexOfItem_i]; // The R_train should be all integers
                        double meanOfItem_i  = meanByItem[indexOfItem_i];

                        // Find out the neighbors of item_i, i.e., "\vec{r}_u\r_ui" in Eq. 21
                        List <int> neighborsOfItem_i = new List <int>(itemsOfUser.Count);

                        //neighborsOfItem_i.Remove(indexOfItem_i);    // It is not a neighbor of itself

                        // Keep strong neighbors
                        foreach (int indexOfNeighbor in itemsOfUser)
                        {
                            if (strongSimilarityIndicators.Contains(new Tuple <int, int>(indexOfItem_i, indexOfNeighbor)) &&
                                indexOfNeighbor != indexOfItem_i)
                            {
                                neighborsOfItem_i.Add(indexOfNeighbor);
                            }
                            //else if(indexOfItem_i!=indexOfNeighbor)
                            //{
                            //    double pearson = Correlation.Pearson((SparseVector)R_train.Matrix.Column(indexOfItem_i),
                            //        (SparseVector)R_train.Matrix.Column(indexOfNeighbor));
                            //    Debug.Assert(pearson < 0.2);
                            //}
                        }

                        // Partition function Z_ui
                        double        Z_ui             = 0;
                        List <double> localLikelihoods = new List <double>(ratingLevels);

                        Object lockMe = new object();
                        for (int targetRating = 1; targetRating <= ratingLevels; targetRating++)
                        {
                            double Z_ui_level = OMFDistributions[new Tuple <int, int>(indexOfUser, indexOfItem_i)][targetRating - 1]
                                                * ComputePotential(targetRating, indexOfUser, indexOfItem_i, neighborsOfItem_i);
                            lock (lockMe)
                            {
                                Z_ui += Z_ui_level;
                            }
                        }

                        for (int targetRating = 1; targetRating <= ratingLevels; targetRating++)
                        {
                            //for (int targetRating = 1; targetRating <= ratingLevels; targetRating++)
                            //{
                            // The reason we need to compute the local likelihood for every i-j pair
                            // instead of once for i is that the weights are changing
                            // TODO: however, it seems that the changed weights are not related to
                            // this locallikelihood, which means it can be put outside of the i-j loop?
                            // Because after we updated i, i should never be updated again by this user in this epoch
                            // TODO: so we try move it out side the j loop
                            // Experiment shows we are correct
                            double localLikelihoodOfTargetRating = ComputeLocalLikelihood(targetRating, indexOfUser,
                                                                                          indexOfItem_i, neighborsOfItem_i, Z_ui);
                            lock (lockMe)
                            {
                                localLikelihoods.Add(localLikelihoodOfTargetRating);
                            }
                        }

                        // For each neighbor item with strong correlation to item_i,
                        // update the weight w_ij
                        foreach (int indexOfItem_j in neighborsOfItem_i)
                        {
                            // As i-j and j-i correspond to the same feature,
                            // so we train only if i < j to avoid double training
                            if (indexOfItem_i > indexOfItem_j)
                            {
                                continue;
                            }

                            // If the similarity is zero then it is a weak feature and we skip it
                            // recall that we have set weak similarity to zero
                            // if (similarityByItemItem[indexOfItem_i, indexOfItem_j] == SparseMatrix.Zero) { continue; }
                            // we don't need to do this now, the filtering has been done before the loop

                            // Compute gradient Eq. 24
                            #region Compute gradients
                            double r_uj         = R_train[indexOfUser, indexOfItem_j];
                            double meanOfItem_j = meanByItem[indexOfItem_j];

                            // Compute the first term in Eq.24
                            double gradientFirstTerm = ComputeCorrelationFeature(r_ui, meanOfItem_i, r_uj, meanOfItem_j);

                            // Compute the second term in Eq. 24
                            double gradientSecondTerm = 0.0;
                            for (int targetRating = 1; targetRating <= ratingLevels; targetRating++)
                            {
                                // The reason we need to compute the local likelihood for every i-j pair
                                // instead of once for i is that the weights are changing
                                // TODO: however, it seems that the changed weights are not related to
                                // this locallikelihood, which means it can be put outside of the i-j loop?
                                // Because after we updated i, i should never be updated again by this user in this epoch
                                // TODO: so we try move it out side the j loop once the algorithm is table
                                //double localLikelihoodOfTargetRating = ComputeLocalLikelihood(targetRating, indexOfUser, indexOfItem_i, neighborsOfItem_i, Z_ui);

                                double localLikelihoodOfTargetRating = localLikelihoods[targetRating - 1];
                                double correlationFeature            = ComputeCorrelationFeature(targetRating, meanOfItem_i, r_uj, meanOfItem_j);
                                gradientSecondTerm += localLikelihoodOfTargetRating * correlationFeature;
                            }

                            // Merge all terms
                            double gradient = gradientFirstTerm - gradientSecondTerm;

                            #endregion

                            #region Update weights

                            // Add regularization penalty, it should be shown in either Eq. 23 or Eq. 24
                            double weight = featureWeightByItemItem[new Tuple <int, int>(indexOfItem_i, indexOfItem_j)];
                            gradient -= regularization * weight;
                            double step = learnRate * gradient; // Add learning rate

                            // Update the weight with gradient
                            featureWeightByItemItem[new Tuple <int, int>(indexOfItem_i, indexOfItem_j)] += step;

                            // The weights are mirrored
                            featureWeightByItemItem[new Tuple <int, int>(indexOfItem_j, indexOfItem_i)] += step;

                            #endregion
                        }
                    }
                }
                #endregion

                /************************************************************
                *   Compute the regularized sum of log local likelihoods, Eq. 20
                *   see if it converges
                ************************************************************/
                #region Compute sum of regularized log likelihood see if it converges

                if (epoch == 0 || epoch == maxEpoch - 1 || epoch % (int)Math.Ceiling(maxEpoch * 0.1) == 4)
                //if (true)
                {
                    double likelihood_curr = 0;
                    // We compute user by user so that Z_ui can be reused
                    double sumOfLogLL = 0.0;   // sum of log local likelihoods, first term in Eq. 20
                    foreach (var user in R_train.Users)
                    {
                        int             indexOfUser   = user.Item1;
                        Vector <double> ratingsOfUser = user.Item2;
                        Debug.Assert(ratingsOfUser.Storage.IsDense == false, "The user ratings should be stored in a sparse vector.");

                        List <int> itemsOfUser = itemsByUser[indexOfUser]; // Cache the items rated by this user
                        double     logLLOfUser = 0.0;                      // The sum of all Eq. 21 of the current user

                        foreach (var item_i in ratingsOfUser.EnumerateIndexed(Zeros.AllowSkip))
                        {
                            int    indexOfItem_i = item_i.Item1;
                            int    r_ui          = (int)R_train[indexOfUser, indexOfItem_i]; // The R_train should be all integers
                            double meanOfItem_i  = meanByItem[indexOfItem_i];

                            // Find out the neighbors of item_i, i.e., "\vec{r}_u\r_ui" in Eq. 21
                            //List<int> neighborsOfItem_i = new List<int>(itemsOfUser);
                            List <int> neighborsOfItem_i = new List <int>(itemsOfUser.Count);

                            //neighborsOfItem_i.Remove(indexOfItem_i);    // It is not a neighbor of itself

                            // Remove weak neighbors
                            foreach (int indexOfNeighbor in itemsOfUser)
                            {
                                if (strongSimilarityIndicators.Contains(new Tuple <int, int>(indexOfItem_i, indexOfNeighbor)) &&
                                    indexOfNeighbor != indexOfItem_i)
                                {
                                    neighborsOfItem_i.Add(indexOfNeighbor);
                                    //neighborsOfItem_i.Remove(indexOfNeighbor);
                                }
                            }

                            // Partition function Z_ui
                            double Z_ui = 0;
                            for (int targetRating = 1; targetRating <= ratingLevels; targetRating++)
                            {
                                Z_ui += OMFDistributions[new Tuple <int, int>(indexOfUser, indexOfItem_i)][targetRating - 1]
                                        * ComputePotential(targetRating, indexOfUser, indexOfItem_i, neighborsOfItem_i);
                            }

                            // Eq. 21 for the current item i, that is for r_ui
                            double localLikelihoodOfRating_ui = ComputeLocalLikelihood(r_ui, indexOfUser, indexOfItem_i, neighborsOfItem_i, Z_ui);
                            logLLOfUser += Math.Log(localLikelihoodOfRating_ui);
                        }
                        sumOfLogLL += logLLOfUser;
                    }

                    // Eq. 20
                    double regularizedSumOfLogLL = sumOfLogLL - regularization
                                                   * featureWeightByItemItem.Sum(x => x.Value * x.Value);// featureWeightByItemItem.SquaredSum();
                    likelihood_curr = regularizedSumOfLogLL;
                    Utils.PrintEpoch("Epoch", epoch, maxEpoch, "Reg sum of log LL", regularizedSumOfLogLL.ToString("0.000"));

                    double improvment = Math.Abs(likelihood_prev) - Math.Abs(likelihood_curr);
                    if (improvment < 0.001)
                    {
                        Console.WriteLine("Improvment less than 0.0001, learning stopped.");
                        break;
                    }

                    likelihood_prev = likelihood_curr;
                }


                /*
                 * if(epoch==0)
                 * {
                 *  likelihood_prev = likelihood_curr;
                 * }
                 * else
                 * {
                 *  double improvment = likelihood_curr - likelihood_prev;
                 *  if(!(improvment < 0 && likelihood_prev < 0 && Math.Abs(improvment) > 0.001))
                 *  {
                 *
                 *  }
                 *
                 *  if (Math.Abslikelihood_curr - likelihood_prev < 0.0001)
                 *  {
                 *      Console.WriteLine("Improvment less than 0.0001, learning stopped.");
                 *      break;
                 *  }
                 * }
                 */

                #endregion
            }
            #endregion

            /************************************************************
            *   Make predictions
            ************************************************************/
            #region Make predictions

            foreach (var user in R_unknown.Users)
            {
                int             indexOfUser          = user.Item1;
                Vector <double> unknownRatingsOfUser = user.Item2;
                List <int>      itemsOfUser          = itemsByUser[indexOfUser];

                foreach (var unknownRating in unknownRatingsOfUser.EnumerateIndexed(Zeros.AllowSkip))
                {
                    int indexOfItem = unknownRating.Item1;

                    List <int> neighborsOfItem = new List <int>(itemsOfUser.Count);
                    //neighborsOfItem.Remove(indexOfItem);    // It is not a neighbor of itself
                    // Remove weak neighbors
                    foreach (int indexOfNeighbor in itemsOfUser)
                    {
                        if (strongSimilarityIndicators.Contains(new Tuple <int, int>(indexOfItem, indexOfNeighbor)) &&
                            indexOfNeighbor != indexOfItem)
                        {
                            neighborsOfItem.Add(indexOfNeighbor);
                        }
                    }

                    // Partition function Z
                    double Z_ui = 0;
                    for (int targetRating = 1; targetRating <= ratingLevels; targetRating++)
                    {
                        Z_ui += OMFDistributions[new Tuple <int, int>(indexOfUser, indexOfItem)][targetRating - 1] * ComputePotential(targetRating, indexOfUser, indexOfItem, neighborsOfItem);
                    }

                    double sumOfLikelihood      = 0.0;
                    double currentMaxLikelihood = 0.0;
                    double mostlikelyRating     = 0.0;
                    double expectationRating    = 0.0;
                    for (int targetRating = 1; targetRating <= ratingLevels; targetRating++)
                    {
                        double likelihoodOfTargetRating = ComputeLocalLikelihood(targetRating, indexOfUser, indexOfItem, neighborsOfItem, Z_ui);

                        // Compute the most likely rating for MAE
                        if (likelihoodOfTargetRating > currentMaxLikelihood)
                        {
                            mostlikelyRating     = targetRating;
                            currentMaxLikelihood = likelihoodOfTargetRating;
                        }

                        // Compute expectation for RMSE
                        expectationRating += targetRating * likelihoodOfTargetRating;

                        sumOfLikelihood += likelihoodOfTargetRating;
                    }

                    // The sum of likelihoods should be 1, maybe not that high precision though
                    Debug.Assert(Math.Abs(sumOfLikelihood - 1.0) < 0.0001);

                    R_predicted_expectations[indexOfUser, indexOfItem] = expectationRating;
                    R_predicted_mostlikely[indexOfUser, indexOfItem]   = mostlikelyRating;
                }
            }

            #endregion
        }