///
        protected override void Iterate(IList <int> rating_indices, bool update_user, bool update_item)
        {
            SetupLoss();

            foreach (int index in rating_indices)
            {
                int u = ratings.Users[index];
                int i = ratings.Items[index];

                double score     = Predict(u, i, false);
                double sig_score = 1 / (1 + Math.Exp(-score));

                double prediction = min_rating + sig_score * rating_range_size;
                double err        = ratings[index] - prediction;

                float user_reg_weight = FrequencyRegularization ? (float)(RegU / Math.Sqrt(ratings.CountByUser[u])) : RegU;
                float item_reg_weight = FrequencyRegularization ? (float)(RegI / Math.Sqrt(ratings.CountByItem[i])) : RegI;

                // adjust biases
                if (update_user)
                {
                    user_bias[u] += BiasLearnRate * current_learnrate * (float)(err - BiasReg * user_reg_weight * user_bias[u]);
                }
                if (update_item)
                {
                    item_bias[i] += BiasLearnRate * current_learnrate * (float)(err - BiasReg * item_reg_weight * item_bias[i]);
                }

                // adjust latent factors

                // adjust groups
                if (u < user_connections.NumberOfRows)
                {
                    IList <int> connection_list = user_connections.GetEntriesByRow(u);
                    if (connection_list.Count > 0)
                    {
                        foreach (int connection_id in connection_list)
                        {
                            group[connection_id] += BiasLearnRate * current_learnrate * (float)(err - BiasReg * Regularization * group[connection_id]);
                        }
                    }
                }
            }

            UpdateLearnRate();
        }
        void ComputeProbabilities(IList <int> users)
        {
            foreach (int user_id in users)
            {
                // initialize counter variables
                var user_class_counts = new int[ratings.Scale.Levels.Count];
                var user_attribute_given_class_counts = new SparseMatrix <int>(ratings.Scale.Levels.Count, ItemAttributes.NumberOfColumns);

                // count
                foreach (int index in ratings.ByUser[user_id])
                {
                    int item_id  = ratings.Items[index];
                    int level_id = ratings.Scale.LevelID[ratings[index]];

                    user_class_counts[level_id]++;
                    foreach (int attribute_id in item_attributes.GetEntriesByRow(item_id))
                    {
                        user_attribute_given_class_counts[attribute_id, level_id]++;
                    }
                }

                // compute probabilities
                float denominator = user_class_counts.Sum() + ClassSmoothing;

                foreach (int level_id in ratings.Scale.LevelID.Values)
                {
                    user_class_probabilities[user_id, level_id] = (user_class_counts[level_id] + ClassSmoothing) / denominator;

                    // TODO sparsify?
                    for (int attribute_id = 0; attribute_id < NumItemAttributes; attribute_id++)
                    {
                        user_attribute_given_class_probabilities[user_id][attribute_id, level_id]
                            = (user_attribute_given_class_counts[attribute_id, level_id] + AttributeSmoothing) / (NumItemAttributes + AttributeSmoothing);
                    }
                }
            }
        }
예제 #3
0
        /// <summary>Compute the fit of the item mapping</summary>
        /// <returns>
        /// an array of doubles containing the RMSE on the training data for each latent factor
        /// </returns>
        protected double[] ComputeItemMappingFit()
        {
            double rmse    = 0;
            double penalty = 0;
            var    rmse_and_penalty_per_factor = new double[num_factors];

            int num_items = 0;

            for (int item_id = 0; item_id <= MaxItemID; item_id++)
            {
                if (item_id > Feedback.MaxItemID)
                {
                    continue;
                }
                var item_users = Feedback.ItemMatrix.GetEntriesByRow(item_id);
                var item_attrs = item_attributes.GetEntriesByRow(item_id);
                if (item_users.Count == 0 || item_attrs.Count == 0)                 // TODO why ignore users w/o attributes?
                {
                    continue;
                }

                num_items++;

                float[] est_factors = MapItemToLatentFactorSpace(item_id);
                for (int f = 0; f < num_factors; f++)
                {
                    double error    = Math.Pow(est_factors[f] - item_factors[item_id, f], 2);
                    double reg_term = item_attribute_to_factor.GetColumn(f).EuclideanNorm();
                    rmse    += error;
                    penalty += reg_term;
                    rmse_and_penalty_per_factor[f] += error + reg_term;
                }
            }

            for (int i = 0; i < num_factors; i++)
            {
                rmse_and_penalty_per_factor[i] = (double)rmse_and_penalty_per_factor[i] / num_items;
                Console.Error.Write("{0:0.####} ", rmse_and_penalty_per_factor[i]);
            }
            rmse    = (double)rmse / (num_factors * num_items);
            penalty = (double)penalty / (num_factors * num_items);
            Console.Error.WriteLine(" > {0:0.####} ({1:0.####})", rmse, penalty);

            return(rmse_and_penalty_per_factor);
        }
        /// <summary>
        /// Compute the fit of the mapping
        /// </summary>
        /// <returns>
        /// an array of doubles containing the RMSE on the training data for each latent factor
        /// </returns>
        protected double[] ComputeMappingFit()
        {
            double rmse    = 0;
            double penalty = 0;
            var    rmse_and_penalty_per_factor = new double[num_factors];

            int num_items = 0;

            for (int i = 0; i < MaxItemID + 1; i++)
            {
                var item_users = Feedback.ItemMatrix.GetEntriesByRow(i);
                var item_attrs = item_attributes.GetEntriesByRow(i);
                if (item_users.Count == 0 || item_attrs.Count == 0)
                {
                    continue;
                }

                num_items++;

                float[] est_factors = MapToLatentFactorSpace(i);
                for (int j = 0; j < num_factors; j++)
                {
                    double error    = Math.Pow(est_factors[j] - item_factors[i, j], 2);
                    double reg_term = reg_mapping * VectorExtensions.EuclideanNorm(attribute_to_factor.GetColumn(j));
                    rmse    += error;
                    penalty += reg_term;
                    rmse_and_penalty_per_factor[j] += error + reg_term;
                }
            }

            for (int i = 0; i < num_factors; i++)
            {
                rmse_and_penalty_per_factor[i] = (double)rmse_and_penalty_per_factor[i] / num_items;
                Console.Error.Write("{0:0.####} ", rmse_and_penalty_per_factor[i]);
            }
            rmse    = (double)rmse / (num_factors * num_items);
            penalty = (double)penalty / (num_factors * num_items);
            Console.Error.WriteLine(" > {0:0.####} ({1:0.####})", rmse, penalty);

            return(rmse_and_penalty_per_factor);
        }
예제 #5
0
        ///
        protected override void Iterate(IList <int> rating_indices, bool update_user, bool update_item)
        {
            float reg = Regularization;             // to limit property accesses

            foreach (int index in rating_indices)
            {
                int u = ratings.Users[index];
                int i = ratings.Items[index];

                double prediction = global_bias + user_bias[u] + item_bias[i];

                if (u < user_attributes.NumberOfRows)
                {
                    IList <int> attribute_list = user_attributes.GetEntriesByRow(u);
                    if (attribute_list.Count > 0)
                    {
                        double sum = 0;
                        double second_norm_denominator = attribute_list.Count;
                        foreach (int attribute_id in attribute_list)
                        {
                            sum += main_demo[attribute_id];
                        }
                        prediction += sum / second_norm_denominator;
                    }
                }

                for (int d = 0; d < additional_user_attributes.Count; d++)
                {
                    if (u < additional_user_attributes[d].NumberOfRows)
                    {
                        IList <int> attribute_list = additional_user_attributes[d].GetEntriesByRow(u);
                        if (attribute_list.Count > 0)
                        {
                            double sum = 0;
                            double second_norm_denominator = attribute_list.Count;
                            foreach (int attribute_id in attribute_list)
                            {
                                sum += second_demo[d][attribute_id];
                            }
                            prediction += sum / second_norm_denominator;
                        }
                    }
                }

                if (i < ItemAttributes.NumberOfRows)
                {
                    IList <int> attribute_list = ItemAttributes.GetEntriesByRow(i);
                    if (attribute_list.Count > 0)
                    {
                        double sum = 0;
                        double second_norm_denominator = attribute_list.Count;
                        foreach (int attribute_id in attribute_list)
                        {
                            sum += main_metadata[attribute_id];
                        }
                        prediction += sum / second_norm_denominator;
                    }
                }

                for (int g = 0; g < AdditionalItemAttributes.Count; g++)
                {
                    if (i < AdditionalItemAttributes[g].NumberOfRows)
                    {
                        IList <int> attribute_list = AdditionalItemAttributes[g].GetEntriesByRow(i);
                        if (attribute_list.Count > 0)
                        {
                            double sum = 0;
                            double second_norm_denominator = attribute_list.Count;
                            foreach (int attribute_id in attribute_list)
                            {
                                sum += second_metadata[g][attribute_id];
                            }
                            prediction += sum / second_norm_denominator;
                        }
                    }
                }

                if (u < UserAttributes.NumberOfRows && i < ItemAttributes.NumberOfRows)
                {
                    IList <int> item_attribute_list   = ItemAttributes.GetEntriesByRow(i);
                    double      item_norm_denominator = item_attribute_list.Count;

                    IList <int> user_attribute_list   = UserAttributes.GetEntriesByRow(u);
                    float       user_norm_denominator = user_attribute_list.Count;

                    float demo_spec = 0;
                    float sum       = 0;
                    foreach (int u_att in user_attribute_list)
                    {
                        foreach (int i_att in item_attribute_list)
                        {
                            sum += h[0][u_att, i_att];
                        }
                    }
                    demo_spec += sum / user_norm_denominator;

                    for (int d = 0; d < AdditionalUserAttributes.Count; d++)
                    {
                        user_attribute_list   = AdditionalUserAttributes[d].GetEntriesByRow(u);
                        user_norm_denominator = user_attribute_list.Count;
                        sum = 0;
                        foreach (int u_att in user_attribute_list)
                        {
                            foreach (int i_att in item_attribute_list)
                            {
                                sum += h[d + 1][u_att, i_att];
                            }
                        }
                        demo_spec += sum / user_norm_denominator;
                    }

                    prediction += demo_spec / item_norm_denominator;
                }

                prediction += DataType.MatrixExtensions.RowScalarProduct(user_factors, u, item_factors, i);

                double err = ratings[index] - prediction;

                float user_reg_weight = FrequencyRegularization ? (float)(reg / Math.Sqrt(ratings.CountByUser[u])) : reg;
                float item_reg_weight = FrequencyRegularization ? (float)(reg / Math.Sqrt(ratings.CountByItem[i])) : reg;

                // adjust biases
                if (update_user)
                {
                    user_bias[u] += BiasLearnRate * current_learnrate * ((float)err - BiasReg * user_reg_weight * user_bias[u]);
                }
                if (update_item)
                {
                    item_bias[i] += BiasLearnRate * current_learnrate * ((float)err - BiasReg * item_reg_weight * item_bias[i]);
                }

                // adjust user attributes
                if (u < user_attributes.NumberOfRows)
                {
                    IList <int> attribute_list = user_attributes.GetEntriesByRow(u);
                    if (attribute_list.Count > 0)
                    {
                        double second_norm_denominator = attribute_list.Count;
                        double second_norm_error       = err / second_norm_denominator;

                        foreach (int attribute_id in attribute_list)
                        {
                            main_demo[attribute_id] += BiasLearnRate * current_learnrate * ((float)second_norm_error - BiasReg * reg * main_demo[attribute_id]);
                        }
                    }
                }

                for (int d = 0; d < additional_user_attributes.Count; d++)
                {
                    if (u < additional_user_attributes[d].NumberOfRows)
                    {
                        IList <int> attribute_list = additional_user_attributes[d].GetEntriesByRow(u);
                        if (attribute_list.Count > 0)
                        {
                            double second_norm_denominator = attribute_list.Count;
                            double second_norm_error       = err / second_norm_denominator;

                            foreach (int attribute_id in attribute_list)
                            {
                                second_demo[d][attribute_id] += BiasLearnRate * current_learnrate * ((float)second_norm_error - BiasReg * reg * second_demo[d][attribute_id]);
                            }
                        }
                    }
                }

                // adjust item attributes
                if (i < ItemAttributes.NumberOfRows)
                {
                    IList <int> attribute_list = ItemAttributes.GetEntriesByRow(i);
                    if (attribute_list.Count > 0)
                    {
                        foreach (int attribute_id in attribute_list)
                        {
                            main_metadata[attribute_id] += BiasLearnRate * current_learnrate * ((float)err - BiasReg * Regularization * main_metadata[attribute_id]);
                        }
                    }
                }

                for (int g = 0; g < AdditionalItemAttributes.Count; g++)
                {
                    if (i < AdditionalItemAttributes[g].NumberOfRows)
                    {
                        IList <int> attribute_list = AdditionalItemAttributes[g].GetEntriesByRow(i);
                        if (attribute_list.Count > 0)
                        {
                            foreach (int attribute_id in attribute_list)
                            {
                                second_metadata[g][attribute_id] += BiasLearnRate * current_learnrate * ((float)err - BiasReg * Regularization * second_metadata[g][attribute_id]);
                            }
                        }
                    }
                }

                // adjust demo specific attributes
                if (u < UserAttributes.NumberOfRows && i < ItemAttributes.NumberOfRows)
                {
                    IList <int> item_attribute_list   = ItemAttributes.GetEntriesByRow(i);
                    float       item_norm_denominator = item_attribute_list.Count;

                    IList <int> user_attribute_list = UserAttributes.GetEntriesByRow(u);
                    float       user_norm           = 1 / user_attribute_list.Count;

                    float norm_error = (float)err / item_norm_denominator;

                    foreach (int u_att in user_attribute_list)
                    {
                        foreach (int i_att in item_attribute_list)
                        {
                            h[0][u_att, i_att] += BiasLearnRate * current_learnrate * (norm_error * user_norm - BiasReg * reg * h[0][u_att, i_att]);
                        }
                    }

                    for (int d = 0; d < AdditionalUserAttributes.Count; d++)
                    {
                        user_attribute_list = AdditionalUserAttributes[d].GetEntriesByRow(u);
                        user_norm           = 1 / user_attribute_list.Count;

                        foreach (int u_att in user_attribute_list)
                        {
                            foreach (int i_att in item_attribute_list)
                            {
                                h[d + 1][u_att, i_att] += BiasLearnRate * current_learnrate * (norm_error * user_norm - BiasReg * reg * h[d + 1][u_att, i_att]);;
                            }
                        }
                    }
                }

                // adjust latent factors
                for (int f = 0; f < NumFactors; f++)
                {
                    double u_f = user_factors[u, f];
                    double i_f = item_factors[i, f];

                    if (update_user)
                    {
                        double delta_u = err * i_f - user_reg_weight * u_f;
                        user_factors.Inc(u, f, current_learnrate * delta_u);
                    }
                    if (update_item)
                    {
                        double delta_i = err * u_f - item_reg_weight * i_f;
                        item_factors.Inc(i, f, current_learnrate * delta_i);
                    }
                }
            }

            UpdateLearnRate();
        }
예제 #6
0
        private void Optimize(int u, IBooleanMatrix data, Matrix <float> W, Matrix <float> H, Matrix <double> HH)
        {
            var row = data.GetEntriesByRow(u);
            // HC_minus_IH is symmetric
            // create HC_minus_IH in O(f^2|S_u|)
            var HC_minus_IH = new Matrix <double>(num_factors, num_factors);

            for (int f_1 = 0; f_1 < num_factors; f_1++)
            {
                for (int f_2 = f_1; f_2 < num_factors; f_2++)
                {
                    double d = 0;
                    foreach (int i in row)
                    {
                        d += H[i, f_1] * H[i, f_2];
                    }
                    HC_minus_IH[f_1, f_2] = d * Alpha;
                    HC_minus_IH[f_2, f_1] = d * Alpha;
                }
            }
            // create HCp in O(f|S_u|)
            var HCp = new double[num_factors];

            for (int f = 0; f < num_factors; f++)
            {
                double d = 0;
                foreach (int i in row)
                {
                    d += H[i, f];
                }
                HCp[f] = d * (1 + Alpha);
            }
            // create m = HH + HC_minus_IH + reg*I
            // m is symmetric
            // the inverse m_inv is symmetric
            var m = new DenseMatrix(num_factors, num_factors);

            for (int f_1 = 0; f_1 < num_factors; f_1++)
            {
                for (int f_2 = f_1; f_2 < num_factors; f_2++)
                {
                    double d = HH[f_1, f_2] + HC_minus_IH[f_1, f_2];
                    if (f_1 == f_2)
                    {
                        d += Regularization;
                    }
                    m[f_1, f_2] = d;
                    m[f_2, f_1] = d;
                }
            }
            var m_inv = m.Inverse();

            // write back optimal W
            for (int f = 0; f < num_factors; f++)
            {
                double d = 0;
                for (int f_2 = 0; f_2 < num_factors; f_2++)
                {
                    d += m_inv[f, f_2] * HCp[f_2];
                }
                W[u, f] = (float)d;
            }
        }
예제 #7
0
        /// <summary>Optimizes the specified data</summary>
        /// <param name="data">data</param>
        /// <param name="inverse_data">data</param>
        /// <param name="W">W</param>
        /// <param name="H">H</param>
        void Optimize(IBooleanMatrix data, IBooleanMatrix inverse_data, Matrix <double> W, Matrix <double> H)
        {
            var HH          = new Matrix <double>(num_factors, num_factors);
            var HC_minus_IH = new Matrix <double>(num_factors, num_factors);
            var HCp         = new double[num_factors];

            var m = new MathNet.Numerics.LinearAlgebra.Matrix(num_factors, num_factors);

            MathNet.Numerics.LinearAlgebra.Matrix m_inv;
            // TODO speed up using more parts of that library

            // TODO using properties gives a 3-5% performance penalty

            // source code comments are in terms of computing the user factors
            // works the same with users and items exchanged

            // (1) create HH in O(f^2|Items|)
            // HH is symmetric
            for (int f_1 = 0; f_1 < num_factors; f_1++)
            {
                for (int f_2 = 0; f_2 < num_factors; f_2++)
                {
                    double d = 0;
                    for (int i = 0; i < H.dim1; i++)
                    {
                        d += H[i, f_1] * H[i, f_2];
                    }
                    HH[f_1, f_2] = d;
                }
            }
            // (2) optimize all U
            // HC_minus_IH is symmetric
            for (int u = 0; u < W.dim1; u++)
            {
                var row = data.GetEntriesByRow(u);

                // prepare KDD Cup specific weighting
                int num_user_items           = row.Count;
                int user_positive_weight_sum = 0;
                foreach (int i in row)
                {
                    user_positive_weight_sum += inverse_data.NumEntriesByRow(i);
                }
                double neg_weight_normalization = (double)(num_user_items * (1 + CPos)) / (Feedback.Count - user_positive_weight_sum);
                // TODO precompute
                // TODO check whether this is correct

                // create HC_minus_IH in O(f^2|S_u|)
                for (int f_1 = 0; f_1 < num_factors; f_1++)
                {
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        double d = 0;
                        foreach (int i in row)
                        {
                            //d += H[i, f_1] * H[i, f_2] * (c_pos - 1);
                            d += H[i, f_1] * H[i, f_2] * CPos;
                        }
                        HC_minus_IH[f_1, f_2] = d;
                    }
                }
                // create HCp in O(f|S_u|)
                for (int f = 0; f < num_factors; f++)
                {
                    double d = 0;
                    for (int i = 0; i < inverse_data.NumberOfRows; i++)
                    {
                        if (row.Contains(i))
                        {
                            d += H[i, f] * (1 + CPos);
                        }
                        else
                        {
                            d += H[i, f] * inverse_data.NumEntriesByRow(i) * neg_weight_normalization;
                        }
                    }
                    HCp[f] = d;
                }
                // create m = HH + HC_minus_IH + reg*I
                // m is symmetric
                // the inverse m_inv is symmetric
                for (int f_1 = 0; f_1 < num_factors; f_1++)
                {
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        double d = HH[f_1, f_2] + HC_minus_IH[f_1, f_2];
                        if (f_1 == f_2)
                        {
                            d += Regularization;
                        }
                        m[f_1, f_2] = d;
                    }
                }
                m_inv = m.Inverse();
                // write back optimal W
                for (int f = 0; f < num_factors; f++)
                {
                    double d = 0;
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        d += m_inv[f, f_2] * HCp[f_2];
                    }
                    W[u, f] = d;
                }
            }
        }
예제 #8
0
파일: WRMF.cs 프로젝트: bemde/MyMediaLite
        /// <summary>Optimizes the specified data</summary>
        /// <param name="data">data</param>
        /// <param name="W">W</param>
        /// <param name="H">H</param>
        protected virtual void Optimize(IBooleanMatrix data, Matrix<float> W, Matrix<float> H)
        {
            var HH          = new Matrix<double>(num_factors, num_factors);
            var HC_minus_IH = new Matrix<double>(num_factors, num_factors);
            var HCp         = new double[num_factors];

            var m = new DenseMatrix(num_factors, num_factors);

            // source code comments are in terms of computing the user factors
            // works the same with users and items exchanged

            // (1) create HH in O(f^2|Items|)
            // HH is symmetric
            for (int f_1 = 0; f_1 < num_factors; f_1++)
                for (int f_2 = 0; f_2 < num_factors; f_2++)
                {
                    double d = 0;
                    for (int i = 0; i < H.dim1; i++)
                        d += H[i, f_1] * H[i, f_2];
                    HH[f_1, f_2] = d;
                }
            // (2) optimize all U
            // HC_minus_IH is symmetric
            for (int u = 0; u < W.dim1; u++)
            {
                var row = data.GetEntriesByRow(u);
                // create HC_minus_IH in O(f^2|S_u|)
                for (int f_1 = 0; f_1 < num_factors; f_1++)
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        double d = 0;
                        foreach (int i in row)
                            //d += H[i, f_1] * H[i, f_2] * (c_pos - 1);
                            d += H[i, f_1] * H[i, f_2] * c_pos;
                        HC_minus_IH[f_1, f_2] = d;
                    }
                // create HCp in O(f|S_u|)
                for (int f = 0; f < num_factors; f++)
                {
                    double d = 0;
                    foreach (int i in row)
                        //d += H[i, f] * c_pos;
                        d += H[i, f] * (1 + c_pos);
                    HCp[f] = d;
                }
                // create m = HH + HC_minus_IH + reg*I
                // m is symmetric
                // the inverse m_inv is symmetric
                for (int f_1 = 0; f_1 < num_factors; f_1++)
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        double d = HH[f_1, f_2] + HC_minus_IH[f_1, f_2];
                        if (f_1 == f_2)
                            d += regularization;
                        m[f_1, f_2] = d;
                    }
                var m_inv = m.Inverse();
                // write back optimal W
                for (int f = 0; f < num_factors; f++)
                {
                    double d = 0;
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                        d += m_inv[f, f_2] * HCp[f_2];
                    W[u, f] = (float) d;
                }
            }
        }
        protected void SampleAnyItemPair(int user_id, out int item_id, out int other_item_id)
        {
            var user_items = Feedback.UserMatrix [user_id];

            while (true)
            {
                item_id       = random.Next(MaxItemID + 1);
                other_item_id = random.Next(MaxItemID + 1);

                if ((user_items.Contains(item_id) && user_items.Contains(other_item_id)) ||
                    (!user_items.Contains(item_id) && !user_items.Contains(other_item_id)))
                {
                    if (item_id >= item_attributes.NumberOfRows || other_item_id >= item_attributes.NumberOfRows)
                    {
                        continue;
                    }

                    var attrList = item_attributes.GetEntriesByRow(item_id);
                    if (attrList.Count == 0)
                    {
                        continue;
                    }
                    float sum1 = 0;
                    foreach (int g in attrList)
                    {
                        sum1 += item_attribute_weight_by_user[user_id, g];                        //weights [user_id, g];
                    }
                    sum1 /= attrList.Count;

                    attrList = item_attributes.GetEntriesByRow(other_item_id);
                    if (attrList.Count == 0)
                    {
                        continue;
                    }
                    float sum2 = 0;
                    foreach (int g in attrList)
                    {
                        sum2 += item_attribute_weight_by_user[user_id, g];                        //weights [user_id, g];
                    }
                    sum2 /= attrList.Count;

                    //Console.WriteLine (Math.Abs(sum1-sum2));
                    if (Math.Abs(sum1 - sum2) < 3.5)
                    {
                        continue;
                    }

                    if (sum1 < sum2)
                    {
                        int aux = other_item_id;
                        other_item_id = item_id;
                        item_id       = aux;
                    }
                    return;
                }
                else
                {
                    if (!user_items.Contains(item_id))
                    {
                        int aux = other_item_id;
                        other_item_id = item_id;
                        item_id       = aux;
                    }
                    return;
                }
            }
        }
예제 #10
0
        /// <summary>Optimizes the specified data</summary>
        /// <param name="data">data</param>
        /// <param name="W">W</param>
        /// <param name="H">H</param>
        protected virtual void Optimize(IBooleanMatrix data, Matrix <double> W, Matrix <double> H)
        {
            var HH          = new Matrix <double>(num_factors, num_factors);
            var HC_minus_IH = new Matrix <double>(num_factors, num_factors);
            var HCp         = new double[num_factors];

            var m = new MathNet.Numerics.LinearAlgebra.Matrix(num_factors, num_factors);

            MathNet.Numerics.LinearAlgebra.Matrix m_inv;
            // TODO speed up using more parts of that library

            // source code comments are in terms of computing the user factors
            // works the same with users and items exchanged

            // (1) create HH in O(f^2|Items|)
            // HH is symmetric
            for (int f_1 = 0; f_1 < num_factors; f_1++)
            {
                for (int f_2 = 0; f_2 < num_factors; f_2++)
                {
                    double d = 0;
                    for (int i = 0; i < H.dim1; i++)
                    {
                        d += H[i, f_1] * H[i, f_2];
                    }
                    HH[f_1, f_2] = d;
                }
            }
            // (2) optimize all U
            // HC_minus_IH is symmetric
            for (int u = 0; u < W.dim1; u++)
            {
                var row = data.GetEntriesByRow(u);
                // create HC_minus_IH in O(f^2|S_u|)
                for (int f_1 = 0; f_1 < num_factors; f_1++)
                {
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        double d = 0;
                        foreach (int i in row)
                        {
                            //d += H[i, f_1] * H[i, f_2] * (c_pos - 1);
                            d += H[i, f_1] * H[i, f_2] * c_pos;
                        }
                        HC_minus_IH[f_1, f_2] = d;
                    }
                }
                // create HCp in O(f|S_u|)
                for (int f = 0; f < num_factors; f++)
                {
                    double d = 0;
                    foreach (int i in row)
                    {
                        //d += H[i, f] * c_pos;
                        d += H[i, f] * (1 + c_pos);
                    }
                    HCp[f] = d;
                }
                // create m = HH + HC_minus_IH + reg*I
                // m is symmetric
                // the inverse m_inv is symmetric
                for (int f_1 = 0; f_1 < num_factors; f_1++)
                {
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        double d = HH[f_1, f_2] + HC_minus_IH[f_1, f_2];
                        if (f_1 == f_2)
                        {
                            d += regularization;
                        }
                        m[f_1, f_2] = d;
                    }
                }
                m_inv = m.Inverse();
                // write back optimal W
                for (int f = 0; f < num_factors; f++)
                {
                    double d = 0;
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        d += m_inv[f, f_2] * HCp[f_2];
                    }
                    W[u, f] = d;
                }
            }
        }
예제 #11
0
        /// <summary>Optimizes the specified data</summary>
        /// <param name="data">data</param>
        /// <param name="W">W</param>
        /// <param name="H">H</param>
        protected virtual void Optimize(IBooleanMatrix data, Matrix <float> W, Matrix <float> H)
        {
            var HH = new Matrix <double>(num_factors, num_factors);

            // comments are in terms of computing the user factors
            // ... works the same with users and items exchanged

            // (1) create HH in O(f^2|Items|)
            // HH is symmetric
            for (int f_1 = 0; f_1 < num_factors; f_1++)
            {
                for (int f_2 = 0; f_2 < num_factors; f_2++)
                {
                    double d = 0;
                    for (int i = 0; i < H.dim1; i++)
                    {
                        d += H[i, f_1] * H[i, f_2];
                    }
                    HH[f_1, f_2] = d;
                }
            }
            // (2) optimize all U
            // HC_minus_IH is symmetric
            Parallel.For(0, W.dim1, u =>
            {
                var row = data.GetEntriesByRow(u);
                // create HC_minus_IH in O(f^2|S_u|)
                var HC_minus_IH = new Matrix <double>(num_factors, num_factors);
                for (int f_1 = 0; f_1 < num_factors; f_1++)
                {
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        double d = 0;
                        foreach (int i in row)
                        {
                            d += H[i, f_1] * H[i, f_2] * alpha;
                        }
                        HC_minus_IH[f_1, f_2] = d;
                    }
                }
                // create HCp in O(f|S_u|)
                var HCp = new double[num_factors];
                for (int f = 0; f < num_factors; f++)
                {
                    double d = 0;
                    foreach (int i in row)
                    {
                        d += H[i, f] * (1 + alpha);
                    }
                    HCp[f] = d;
                }
                // create m = HH + HC_minus_IH + reg*I
                // m is symmetric
                // the inverse m_inv is symmetric
                var m = new DenseMatrix(num_factors, num_factors);
                for (int f_1 = 0; f_1 < num_factors; f_1++)
                {
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        double d = HH[f_1, f_2] + HC_minus_IH[f_1, f_2];
                        if (f_1 == f_2)
                        {
                            d += regularization;
                        }
                        m[f_1, f_2] = d;
                    }
                }
                var m_inv = m.Inverse();
                // write back optimal W
                for (int f = 0; f < num_factors; f++)
                {
                    double d = 0;
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        d += m_inv[f, f_2] * HCp[f_2];
                    }
                    W[u, f] = (float)d;
                }
            });
        }
예제 #12
0
        ///
        protected override void Iterate(IList <int> rating_indices, bool update_user, bool update_item)
        {
            user_factors = null;             // delete old user factors
            item_factors = null;             // delete old item factors
            float reg = Regularization;      // to limit property accesses

            foreach (int index in rating_indices)
            {
                int u = ratings.Users[index];
                int i = ratings.Items[index];

                double prediction          = global_bias + user_bias[u] + item_bias[i];
                var    p_plus_y_sum_vector = y.SumOfRows(items_rated_by_user[u]);
                double norm_denominator    = Math.Sqrt(items_rated_by_user[u].Length);
                for (int f = 0; f < p_plus_y_sum_vector.Count; f++)
                {
                    p_plus_y_sum_vector[f] = (float)(p_plus_y_sum_vector[f] / norm_denominator + p[u, f]);
                }

                var q_plus_x_sum_vector = q.GetRow(i);

                if (i < item_attributes.NumberOfRows)
                {
                    IList <int> attribute_list          = item_attributes.GetEntriesByRow(i);
                    double      second_norm_denominator = attribute_list.Count;
                    var         x_sum_vector            = x.SumOfRows(attribute_list);
                    for (int f = 0; f < x_sum_vector.Count; f++)
                    {
                        q_plus_x_sum_vector[f] += (float)(x_sum_vector[f] / second_norm_denominator);
                    }
                }

                prediction += DataType.VectorExtensions.ScalarProduct(q_plus_x_sum_vector, p_plus_y_sum_vector);

                double err = ratings[index] - prediction;

                float user_reg_weight = FrequencyRegularization ? (float)(reg / Math.Sqrt(ratings.CountByUser[u])) : reg;
                float item_reg_weight = FrequencyRegularization ? (float)(reg / Math.Sqrt(ratings.CountByItem[i])) : reg;

                // adjust biases
                if (update_user)
                {
                    user_bias[u] += BiasLearnRate * current_learnrate * ((float)err - BiasReg * user_reg_weight * user_bias[u]);
                }
                if (update_item)
                {
                    item_bias[i] += BiasLearnRate * current_learnrate * ((float)err - BiasReg * item_reg_weight * item_bias[i]);
                }

                double normalized_error = err / norm_denominator;
                for (int f = 0; f < NumFactors; f++)
                {
                    float i_f = q_plus_x_sum_vector[f];

                    // if necessary, compute and apply updates
                    if (update_user)
                    {
                        double delta_u = err * i_f - user_reg_weight * p[u, f];
                        p.Inc(u, f, current_learnrate * delta_u);
                    }
                    if (update_item)
                    {
                        double common_update = normalized_error * i_f;
                        foreach (int other_item_id in items_rated_by_user[u])
                        {
                            double delta_oi = common_update - y_reg[other_item_id] * y[other_item_id, f];
                            y.Inc(other_item_id, f, current_learnrate * delta_oi);
                        }

                        double delta_i = err * p_plus_y_sum_vector[f] - item_reg_weight * q[i, f];
                        q.Inc(i, f, current_learnrate * delta_i);

                        // adjust attributes
                        if (i < item_attributes.NumberOfRows)
                        {
                            IList <int> attribute_list          = item_attributes.GetEntriesByRow(i);
                            double      second_norm_denominator = attribute_list.Count;
                            double      second_norm_error       = err / second_norm_denominator;

                            foreach (int attribute_id in attribute_list)
                            {
                                double delta_oi = second_norm_error * p_plus_y_sum_vector[f] - x_reg[attribute_id] * x[attribute_id, f];
                                x.Inc(attribute_id, f, current_learnrate * delta_oi);
                            }
                        }
                    }
                }
            }

            UpdateLearnRate();
        }
예제 #13
0
		private void Optimize(int u, IBooleanMatrix data, Matrix<float> W, Matrix<float> H, Matrix<double> HH)
		{
			var row = data.GetEntriesByRow(u);
			// HC_minus_IH is symmetric
			// create HC_minus_IH in O(f^2|S_u|)
			var HC_minus_IH = new Matrix<double>(num_factors, num_factors);
			for (int f_1 = 0; f_1 < num_factors; f_1++)
				for (int f_2 = f_1; f_2 < num_factors; f_2++)
				{
					double d = 0;
					foreach (int i in row)
						d += H[i, f_1] * H[i, f_2];
					HC_minus_IH[f_1, f_2] = d * alpha;
					HC_minus_IH[f_2, f_1] = d * alpha;
				}
			// create HCp in O(f|S_u|)
			var HCp = new double[num_factors];
			for (int f = 0; f < num_factors; f++)
			{
				double d = 0;
				foreach (int i in row)
					d += H[i, f];
				HCp[f] = d * (1 + alpha);
			}
			// create m = HH + HC_minus_IH + reg*I
			// m is symmetric
			// the inverse m_inv is symmetric
			var m = new DenseMatrix(num_factors, num_factors);
			for (int f_1 = 0; f_1 < num_factors; f_1++)
				for (int f_2 = f_1; f_2 < num_factors; f_2++)
				{
					double d = HH[f_1, f_2] + HC_minus_IH[f_1, f_2];
					if (f_1 == f_2)
						d += regularization;
					m[f_1, f_2] = d;
					m[f_2, f_1] = d;
				}
			var m_inv = m.Inverse();
			// write back optimal W
			for (int f = 0; f < num_factors; f++)
			{
				double d = 0;
				for (int f_2 = 0; f_2 < num_factors; f_2++)
					d += m_inv[f, f_2] * HCp[f_2];
				W[u, f] = (float) d;
			}
		}
예제 #14
0
        /// <summary>
        /// Sample a pair of items, given a user
        /// </summary>
        /// <param name='user_id'>
        /// the user ID
        /// </param>
        /// <param name='item_id'>
        /// the ID of the first item
        /// </param>
        /// <param name='other_item_id'>
        /// the ID of the second item
        /// </param>
        protected override void SampleItemPair(int user_id, out int item_id, out int other_item_id)
        {
            //SampleAnyItemPair(user_id, out item_id, out other_item_id);
            //base.SampleItemPair(user_id, out item_id, out other_item_id);
            //return;
            var user_items = Feedback.UserMatrix [user_id];

            item_id = user_items.ElementAt(random.Next(user_items.Count));

            if (item_id >= item_attributes.NumberOfRows)
            {
                do
                {
                    other_item_id = random.Next(MaxItemID + 1);
                }while (user_items.Contains(other_item_id));
                return;
            }

            var   attrList = item_attributes.GetEntriesByRow(item_id);
            float sum1     = 0;

            foreach (int g in attrList)
            {
                sum1 += item_attribute_weight_by_user[user_id, g];                //weights [user_id, g];
            }
            if (attrList.Count > 0)
            {
                sum1 /= attrList.Count;
            }
            else
            {
                sum1 = 0;
            }

            while (true)
            {
                other_item_id = random.Next(MaxItemID + 1);
                if (!user_items.Contains(other_item_id))
                {
                    return;
                }

                if (other_item_id >= item_attributes.NumberOfRows)
                {
                    continue;
                }

                attrList = item_attributes.GetEntriesByRow(other_item_id);
                float sum2 = 0;
                foreach (int g in attrList)
                {
                    sum2 += item_attribute_weight_by_user[user_id, g];                    //weights[user_id, g];
                }
                if (attrList.Count > 0)
                {
                    sum2 /= attrList.Count;
                }
                else
                {
                    sum2 = 0;
                }

                if (Math.Abs(sum1 - sum2) < 2.5)
                {
                    continue;
                }

                if (sum1 < sum2)
                {
                    int aux = item_id;
                    item_id       = other_item_id;
                    other_item_id = aux;
                }
                return;
            }
        }
예제 #15
0
        /// <summary>Optimizes the specified data</summary>
        /// <param name="data">data</param>
        /// <param name="inverse_data">data</param>
        /// <param name="W">W</param>
        /// <param name="H">H</param>
        void Optimize(IBooleanMatrix data, IBooleanMatrix inverse_data, Matrix<double> W, Matrix<double> H)
        {
            var HH          = new Matrix<double>(num_factors, num_factors);
            var HC_minus_IH = new Matrix<double>(num_factors, num_factors);
            var HCp         = new double[num_factors];

            var m = new MathNet.Numerics.LinearAlgebra.Matrix(num_factors, num_factors);
            MathNet.Numerics.LinearAlgebra.Matrix m_inv;
            // TODO speed up using more parts of that library

            // TODO using properties gives a 3-5% performance penalty

            // source code comments are in terms of computing the user factors
            // works the same with users and items exchanged

            // (1) create HH in O(f^2|Items|)
            // HH is symmetric
            for (int f_1 = 0; f_1 < num_factors; f_1++)
                for (int f_2 = 0; f_2 < num_factors; f_2++)
                {
                    double d = 0;
                    for (int i = 0; i < H.dim1; i++)
                        d += H[i, f_1] * H[i, f_2];
                    HH[f_1, f_2] = d;
                }
            // (2) optimize all U
            // HC_minus_IH is symmetric
            for (int u = 0; u < W.dim1; u++)
            {
                var row = data.GetEntriesByRow(u);

                // prepare KDD Cup specific weighting
                int num_user_items = row.Count;
                int user_positive_weight_sum = 0;
                foreach (int i in row)
                    user_positive_weight_sum += inverse_data.NumEntriesByRow(i);
                double neg_weight_normalization = (double) (num_user_items * (1 + CPos)) / (Feedback.Count - user_positive_weight_sum);
                // TODO precompute
                // TODO check whether this is correct

                // create HC_minus_IH in O(f^2|S_u|)
                for (int f_1 = 0; f_1 < num_factors; f_1++)
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        double d = 0;
                        foreach (int i in row)
                            //d += H[i, f_1] * H[i, f_2] * (c_pos - 1);
                            d += H[i, f_1] * H[i, f_2] * CPos;
                        HC_minus_IH[f_1, f_2] = d;
                    }
                // create HCp in O(f|S_u|)
                for (int f = 0; f < num_factors; f++)
                {
                    double d = 0;
                    for (int i = 0; i < inverse_data.NumberOfRows; i++)
                        if (row.Contains(i))
                            d += H[i, f] * (1 + CPos);
                        else
                            d += H[i, f] * inverse_data.NumEntriesByRow(i) * neg_weight_normalization;
                    HCp[f] = d;
                }
                // create m = HH + HC_minus_IH + reg*I
                // m is symmetric
                // the inverse m_inv is symmetric
                for (int f_1 = 0; f_1 < num_factors; f_1++)
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                    {
                        double d = HH[f_1, f_2] + HC_minus_IH[f_1, f_2];
                        if (f_1 == f_2)
                            d += Regularization;
                        m[f_1, f_2] = d;
                    }
                m_inv = m.Inverse();
                // write back optimal W
                for (int f = 0; f < num_factors; f++)
                {
                    double d = 0;
                    for (int f_2 = 0; f_2 < num_factors; f_2++)
                        d += m_inv[f, f_2] * HCp[f_2];
                    W[u, f] = d;
                }
            }
        }
        ///
        protected override void Iterate(IList <int> rating_indices, bool update_user, bool update_item)
        {
            user_factors = null;             // delete old user factors
            item_factors = null;             // delete old item factors
            float reg = Regularization;      // to limit property accesses

            foreach (int index in rating_indices)
            {
                int u = ratings.Users[index];
                int i = ratings.Items[index];

                double prediction          = global_bias + user_bias[u] + item_bias[i];
                var    p_plus_y_sum_vector = y.SumOfRows(items_rated_by_user[u]);
                double norm_denominator    = Math.Sqrt(items_rated_by_user[u].Length);
                for (int f = 0; f < p_plus_y_sum_vector.Count; f++)
                {
                    p_plus_y_sum_vector[f] = (float)(p_plus_y_sum_vector[f] / norm_denominator + p[u, f]);
                }

                if (u < user_attributes.NumberOfRows)
                {
                    IList <int> attribute_list = user_attributes.GetEntriesByRow(u);
                    if (attribute_list.Count > 0)
                    {
                        double sum = 0;
                        double second_norm_denominator = attribute_list.Count;
                        foreach (int attribute_id in attribute_list)
                        {
                            sum += main_demo[attribute_id];
                        }
                        prediction += sum / second_norm_denominator;
                    }
                }

                for (int d = 0; d < additional_user_attributes.Count; d++)
                {
                    if (u < additional_user_attributes[d].NumberOfRows)
                    {
                        IList <int> attribute_list = additional_user_attributes[d].GetEntriesByRow(u);
                        if (attribute_list.Count > 0)
                        {
                            double sum = 0;
                            double second_norm_denominator = attribute_list.Count;
                            foreach (int attribute_id in attribute_list)
                            {
                                sum += second_demo[d][attribute_id];
                            }
                            prediction += sum / second_norm_denominator;
                        }
                    }
                }

                if (i < ItemAttributes.NumberOfRows)
                {
                    IList <int> attribute_list = ItemAttributes.GetEntriesByRow(i);
                    if (attribute_list.Count > 0)
                    {
                        double sum = 0;
                        double second_norm_denominator = attribute_list.Count;
                        foreach (int attribute_id in attribute_list)
                        {
                            sum += main_metadata[attribute_id];
                        }
                        prediction += sum / second_norm_denominator;
                    }
                }

                for (int g = 0; g < AdditionalItemAttributes.Count; g++)
                {
                    if (i < AdditionalItemAttributes[g].NumberOfRows)
                    {
                        IList <int> attribute_list = AdditionalItemAttributes[g].GetEntriesByRow(i);
                        if (attribute_list.Count > 0)
                        {
                            double sum = 0;
                            double second_norm_denominator = attribute_list.Count;
                            foreach (int attribute_id in attribute_list)
                            {
                                sum += second_metadata[g][attribute_id];
                            }
                            prediction += sum / second_norm_denominator;
                        }
                    }
                }

                if (u < UserAttributes.NumberOfRows && i < ItemAttributes.NumberOfRows)
                {
                    IList <int> item_attribute_list   = ItemAttributes.GetEntriesByRow(i);
                    double      item_norm_denominator = item_attribute_list.Count;

                    IList <int> user_attribute_list   = UserAttributes.GetEntriesByRow(u);
                    float       user_norm_denominator = user_attribute_list.Count;

                    float demo_spec = 0;
                    float sum       = 0;
                    foreach (int u_att in user_attribute_list)
                    {
                        foreach (int i_att in item_attribute_list)
                        {
                            sum += h[0][u_att, i_att];
                        }
                    }
                    demo_spec += sum / user_norm_denominator;

                    for (int d = 0; d < AdditionalUserAttributes.Count; d++)
                    {
                        user_attribute_list   = AdditionalUserAttributes[d].GetEntriesByRow(u);
                        user_norm_denominator = user_attribute_list.Count;
                        sum = 0;
                        foreach (int u_att in user_attribute_list)
                        {
                            foreach (int i_att in item_attribute_list)
                            {
                                sum += h[d + 1][u_att, i_att];
                            }
                        }
                        demo_spec += sum / user_norm_denominator;
                    }

                    prediction += demo_spec / item_norm_denominator;
                }


                var q_plus_x_sum_vector = q.GetRow(i);
                if (i < item_attributes.NumberOfRows)
                {
                    IList <int> attribute_list = item_attributes.GetEntriesByRow(i);
                    if (attribute_list.Count > 0)
                    {
                        double second_norm_denominator = attribute_list.Count;
                        var    x_sum_vector            = x.SumOfRows(attribute_list);
                        for (int f = 0; f < x_sum_vector.Count; f++)
                        {
                            q_plus_x_sum_vector[f] += (float)(x_sum_vector[f] / second_norm_denominator);
                        }
                    }
                }

                prediction += DataType.VectorExtensions.ScalarProduct(q_plus_x_sum_vector, p_plus_y_sum_vector);

                double err = ratings[index] - prediction;

                float user_reg_weight = FrequencyRegularization ? (float)(reg / Math.Sqrt(ratings.CountByUser[u])) : reg;
                float item_reg_weight = FrequencyRegularization ? (float)(reg / Math.Sqrt(ratings.CountByItem[i])) : reg;

                // adjust biases
                if (update_user)
                {
                    user_bias[u] += BiasLearnRate * current_learnrate * ((float)err - BiasReg * user_reg_weight * user_bias[u]);
                }
                if (update_item)
                {
                    item_bias[i] += BiasLearnRate * current_learnrate * ((float)err - BiasReg * item_reg_weight * item_bias[i]);
                }

                // adjust user attributes
                if (u < user_attributes.NumberOfRows)
                {
                    IList <int> attribute_list = user_attributes.GetEntriesByRow(u);
                    if (attribute_list.Count > 0)
                    {
                        double second_norm_denominator = attribute_list.Count;
                        double second_norm_error       = err / second_norm_denominator;

                        foreach (int attribute_id in attribute_list)
                        {
                            main_demo[attribute_id] += BiasLearnRate * current_learnrate * ((float)second_norm_error - BiasReg * reg * main_demo[attribute_id]);
                        }
                    }
                }

                for (int d = 0; d < additional_user_attributes.Count; d++)
                {
                    if (u < additional_user_attributes[d].NumberOfRows)
                    {
                        IList <int> attribute_list = additional_user_attributes[d].GetEntriesByRow(u);
                        if (attribute_list.Count > 0)
                        {
                            double second_norm_denominator = attribute_list.Count;
                            double second_norm_error       = err / second_norm_denominator;

                            foreach (int attribute_id in attribute_list)
                            {
                                second_demo[d][attribute_id] += BiasLearnRate * current_learnrate * ((float)second_norm_error - BiasReg * reg * second_demo[d][attribute_id]);
                            }
                        }
                    }
                }

                // adjust item attributes
                if (i < ItemAttributes.NumberOfRows)
                {
                    IList <int> attribute_list = ItemAttributes.GetEntriesByRow(i);
                    if (attribute_list.Count > 0)
                    {
                        foreach (int attribute_id in attribute_list)
                        {
                            main_metadata[attribute_id] += BiasLearnRate * current_learnrate * ((float)err - BiasReg * Regularization * main_metadata[attribute_id]);
                        }
                    }
                }

                for (int g = 0; g < AdditionalItemAttributes.Count; g++)
                {
                    if (i < AdditionalItemAttributes[g].NumberOfRows)
                    {
                        IList <int> attribute_list = AdditionalItemAttributes[g].GetEntriesByRow(i);
                        if (attribute_list.Count > 0)
                        {
                            foreach (int attribute_id in attribute_list)
                            {
                                second_metadata[g][attribute_id] += BiasLearnRate * current_learnrate * ((float)err - BiasReg * Regularization * second_metadata[g][attribute_id]);
                            }
                        }
                    }
                }

                // adjust demo specific attributes
                if (u < UserAttributes.NumberOfRows && i < ItemAttributes.NumberOfRows)
                {
                    IList <int> item_attribute_list   = ItemAttributes.GetEntriesByRow(i);
                    float       item_norm_denominator = item_attribute_list.Count;

                    IList <int> user_attribute_list = UserAttributes.GetEntriesByRow(u);
                    float       user_norm           = 1 / user_attribute_list.Count;

                    float norm_error = (float)err / item_norm_denominator;

                    foreach (int u_att in user_attribute_list)
                    {
                        foreach (int i_att in item_attribute_list)
                        {
                            h[0][u_att, i_att] += BiasLearnRate * current_learnrate * (norm_error * user_norm - BiasReg * reg * h[0][u_att, i_att]);
                        }
                    }

                    for (int d = 0; d < AdditionalUserAttributes.Count; d++)
                    {
                        user_attribute_list = AdditionalUserAttributes[d].GetEntriesByRow(u);
                        user_norm           = 1 / user_attribute_list.Count;

                        foreach (int u_att in user_attribute_list)
                        {
                            foreach (int i_att in item_attribute_list)
                            {
                                h[d + 1][u_att, i_att] += BiasLearnRate * current_learnrate * (norm_error * user_norm - BiasReg * reg * h[d + 1][u_att, i_att]);;
                            }
                        }
                    }
                }

                double normalized_error = err / norm_denominator;
                for (int f = 0; f < NumFactors; f++)
                {
                    float i_f = q_plus_x_sum_vector[f];

                    // if necessary, compute and apply updates
                    if (update_user)
                    {
                        double delta_u = err * i_f - user_reg_weight * p[u, f];
                        p.Inc(u, f, current_learnrate * delta_u);
                    }

                    if (update_item)
                    {
                        double common_update = normalized_error * i_f;
                        foreach (int other_item_id in items_rated_by_user[u])
                        {
                            double delta_oi = common_update - y_reg[other_item_id] * y[other_item_id, f];
                            y.Inc(other_item_id, f, current_learnrate * delta_oi);
                        }

                        double delta_i = err * p_plus_y_sum_vector[f] - item_reg_weight * q[i, f];
                        q.Inc(i, f, current_learnrate * delta_i);

                        // adjust attributes
                        if (i < item_attributes.NumberOfRows)
                        {
                            IList <int> attribute_list = item_attributes.GetEntriesByRow(i);
                            if (attribute_list.Count > 0)
                            {
                                double second_norm_denominator = attribute_list.Count;
                                double second_norm_error       = err / second_norm_denominator;

                                foreach (int attribute_id in attribute_list)
                                {
                                    double delta_oi = second_norm_error * p_plus_y_sum_vector[f] - x_reg[attribute_id] * x[attribute_id, f];
                                    x.Inc(attribute_id, f, current_learnrate * delta_oi);
                                }
                            }
                        }
                    }
                }
            }

            UpdateLearnRate();
        }
예제 #17
0
        /// <summary>Evaluation for rankings of items recommended to groups</summary>
        /// <remarks>
        /// </remarks>
        /// <param name="recommender">group recommender</param>
        /// <param name="test">test cases</param>
        /// <param name="train">training data</param>
        /// <param name="group_to_user">group to user relation</param>
        /// <param name="candidate_items">a collection of integers with all candidate items</param>
        /// <param name="ignore_overlap">if true, ignore items that appear for a group in the training set when evaluating for that user</param>
        /// <returns>a dictionary containing the evaluation results</returns>
        public static ItemRecommendationEvaluationResults Evaluate(
			this GroupRecommender recommender,
			IPosOnlyFeedback test,
			IPosOnlyFeedback train,
			IBooleanMatrix group_to_user,
			ICollection<int> candidate_items,
			bool ignore_overlap = true)
        {
            var result = new ItemRecommendationEvaluationResults();

            int num_groups = 0;

            foreach (int group_id in group_to_user.NonEmptyRowIDs)
            {
                var users = group_to_user.GetEntriesByRow(group_id);

                var correct_items = new HashSet<int>();
                foreach (int user_id in users)
                    correct_items.UnionWith(test.UserMatrix[user_id]);
                correct_items.IntersectWith(candidate_items);

                var candidate_items_in_train = new HashSet<int>();
                foreach (int user_id in users)
                    candidate_items_in_train.UnionWith(train.UserMatrix[user_id]);
                candidate_items_in_train.IntersectWith(candidate_items);
                int num_eval_items = candidate_items.Count - (ignore_overlap ? candidate_items_in_train.Count() : 0);

                // skip all groups that have 0 or #candidate_items test items
                if (correct_items.Count == 0)
                    continue;
                if (num_eval_items - correct_items.Count == 0)
                    continue;

                IList<int> prediction_list = recommender.RankItems(users, candidate_items);
                if (prediction_list.Count != candidate_items.Count)
                    throw new Exception("Not all items have been ranked.");

                var ignore_items = ignore_overlap ? candidate_items_in_train : new HashSet<int>();

                double auc  = AUC.Compute(prediction_list, correct_items, ignore_items);
                double map  = PrecisionAndRecall.AP(prediction_list, correct_items, ignore_items);
                double ndcg = NDCG.Compute(prediction_list, correct_items, ignore_items);
                double rr   = ReciprocalRank.Compute(prediction_list, correct_items, ignore_items);
                var positions = new int[] { 5, 10 };
                var prec   = PrecisionAndRecall.PrecisionAt(prediction_list, correct_items, ignore_items, positions);
                var recall = PrecisionAndRecall.RecallAt(prediction_list, correct_items, ignore_items, positions);

                // thread-safe incrementing
                lock(result)
                {
                    num_groups++;
                    result["AUC"]       += (float) auc;
                    result["MAP"]       += (float) map;
                    result["NDCG"]      += (float) ndcg;
                    result["MRR"]       += (float) rr;
                    result["prec@5"]    += (float) prec[5];
                    result["prec@10"]   += (float) prec[10];
                    result["recall@5"]  += (float) recall[5];
                    result["recall@10"] += (float) recall[10];
                }

                if (num_groups % 1000 == 0)
                    Console.Error.Write(".");
                if (num_groups % 60000 == 0)
                    Console.Error.WriteLine();
            }

            result["num_groups"] = num_groups;
            result["num_lists"]  = num_groups;
            result["num_items"]  = candidate_items.Count;

            return result;
        }