Exemple #1
0
        public static void Test_CollaborativeFilteringRSCostFunction_Evaluate(double lambda)
        {
            int num_users = 4; int num_movies = 5; int num_features = 3;

            List <List <double> > X     = DblDataTableUtil.LoadDataSet("X.txt");
            List <List <double> > Y     = DblDataTableUtil.LoadDataSet("Y.txt");
            List <List <int> >    R     = IntDataTableUtil.LoadDataSet("R.txt");
            List <List <double> > Theta = DblDataTableUtil.LoadDataSet("Theta.txt");

            X     = DblDataTableUtil.SubMatrix(X, num_movies, num_features);
            Y     = DblDataTableUtil.SubMatrix(Y, num_movies, num_users);
            R     = IntDataTableUtil.SubMatrix(R, num_movies, num_users);
            Theta = DblDataTableUtil.SubMatrix(Theta, num_users, num_features);

            Matrix <double> Y_matrix     = Convert2Matrix(Y);
            Matrix <double> X_matrix     = Convert2Matrix(X);
            Matrix <double> Theta_matrix = Convert2Matrix(Theta);

            int[,] R_matrix = IntDataTableUtil.Convert2DArray(R);

            int dimension = num_movies * num_features + num_users * num_features; //total number of entries in X and Theta

            double[] theta_x = new double[dimension];
            CollaborativeFilteringRSCostFunction.UnrollMatrixIntoVector(Theta_matrix, X_matrix, theta_x);

            CollaborativeFilteringRSCostFunction f = new CollaborativeFilteringRSCostFunction(Y_matrix, R_matrix, num_movies, num_features, dimension);

            f.RegularizationLambda = lambda;
            double J = f.Evaluate(theta_x);

            Console.WriteLine("Cost at loaded parameters: {0} (this value should be about 22.22)", J);
        }
Exemple #2
0
        private void ImportData(string X_filepath, string y_filepath)
        {
            mTrainingPercent        = 100;
            mTestingPercent         = 0;
            mCrossValidationPercent = 0;

            lvTabularData.Items.Clear();
            lvTabularData.Columns.Clear();

            List <MLDataPoint> X_points = MLDataPointUtil.LoadDataSet(X_filepath);
            List <List <int> > y_points = IntDataTableUtil.LoadDataSet(y_filepath);

            List <CDataRecord> X = new List <CDataRecord>();

            for (int i = 0; i < X_points.Count; ++i)
            {
                CDataRecord X_i = new CDataRecord(X_points[i].Dimension);
                for (int j = 0; j < X_points[i].Dimension; ++j)
                {
                    X_i[j + 1] = X_points[i][j];
                }
                X_i.Label = y_points[i][0].ToString();
                X.Add(X_i);
            }

            lvTabularData.Columns.Add("#");
            for (int i = 0; i < X[0].Dimension; ++i)
            {
                lvTabularData.Columns.Add(string.Format("X[{0}]", i));
            }
            lvTabularData.Columns.Add("Y");
            lvTabularData.Columns.Add("Data Set");
            lvTabularData.Columns.Add("Predicted Y");

            int m = X.Count;

            for (int i = 0; i < m; ++i)
            {
                CDataRecord X_i = X[i];

                ListViewItem item = new ListViewItem();
                item.Text = (lvTabularData.Items.Count + 1).ToString();
                for (int j = 0; j < X_i.Dimension; ++j)
                {
                    item.SubItems.Add(X_i[j].ToString());
                }

                item.SubItems.Add(X_i.Label.ToString());

                item.SubItems.Add(X_i.DataSetType.ToString());
                item.SubItems.Add(X_i.PredictedLabel.ToString());
                item.Tag = X_i;

                item.ForeColor = Color.Green;
                lvTabularData.Items.Add(item);
            }
        }
Exemple #3
0
        public static void Run_Classify2()
        {
            List <MLDataPoint> X_points = MLDataPointUtil.LoadDataSet("X1.txt");
            List <List <int> > y_points = IntDataTableUtil.LoadDataSet("y1.txt");

            List <CDataRecord> X = new List <CDataRecord>();

            for (int i = 0; i < X_points.Count; ++i)
            {
                CDataRecord X_i = new CDataRecord(X_points[i].Dimension);
                for (int j = 0; j < X_points[i].Dimension; ++j)
                {
                    X_i[i + 1] = X_points[i][j]; //X_i index must start at 1
                }
                X_i.Label = y_points[i][0].ToString();
                X.Add(X_i);
            }

            LinearSVM <CDataRecord> algorithm = new LinearSVM <CDataRecord>();

            algorithm.C = 100;

            algorithm.Train(X);
        }
Exemple #4
0
        public static void Test_Compute()
        {
            List <string> movie_titles = LoadMovies();
            int           num_movies   = movie_titles.Count;

            // Step 1: create my ratings with missing entries
            double[] my_ratings   = new double[num_movies];
            int[]    my_ratings_r = new int[num_movies];
            for (int i = 0; i < num_movies; ++i)
            {
                my_ratings[i] = 0;
            }

            my_ratings[1]   = 4;
            my_ratings[98]  = 2;
            my_ratings[7]   = 3;
            my_ratings[12]  = 5;
            my_ratings[54]  = 4;
            my_ratings[64]  = 5;
            my_ratings[66]  = 3;
            my_ratings[69]  = 5;
            my_ratings[183] = 4;
            my_ratings[226] = 5;
            my_ratings[355] = 5;

            for (int i = 0; i < num_movies; ++i)
            {
                my_ratings_r[i] = my_ratings[i] > 0 ? 1 : 0;
            }

            // Step 2: load the current ratings of all users, i.e., Y and R
            List <List <double> > Y = DblDataTableUtil.LoadDataSet("Y.txt");
            List <List <int> >    R = IntDataTableUtil.LoadDataSet("R.txt");

            int num_users;

            DblDataTableUtil.GetSize(Y, out num_movies, out num_users);


            // Step 3: insert my ratings into the existing Y and R (as the first column)
            num_users++;
            List <RatedItem> records = new List <RatedItem>();

            for (int i = 0; i < num_movies; ++i)
            {
                double[] rec_Y = new double[num_users];
                bool[]   rec_R = new bool[num_users];
                for (int j = 0; j < num_users; ++j)
                {
                    if (j == 0)
                    {
                        rec_Y[j] = my_ratings[i];
                        rec_R[j] = my_ratings_r[i] == 1;
                    }
                    else
                    {
                        rec_Y[j] = Y[i][j - 1];
                        rec_R[j] = R[i][j - 1] == 1;
                    }
                }
                RatedItem rec = new RatedItem(null, rec_Y, rec_R);
                records.Add(rec);
            }

            int num_features = 10;

            double lambda = 10;
            CollaborativeFilteringRS <RatedItem> algorithm = new CollaborativeFilteringRS <RatedItem>();

            algorithm.Stepped += (s, step) =>
            {
                Console.WriteLine("#{0}: {1}", step, s.Cost);
            };
            algorithm.RegularizationLambda    = lambda;
            algorithm.MaxLocalSearchIteration = 100;
            GradientDescent local_search = algorithm.LocalSearch as GradientDescent;

            local_search.Alpha = 0.005;

            double[] Ymean;
            algorithm.DoMeanNormalization(records, out Ymean);

            algorithm.Compute(records, num_features);

            algorithm.UndoMeanNormalization(records, Ymean);

            int userId = 0;
            int topK   = 10;
            List <RatedItem> highest_ranks = algorithm.SelectHigestRanked(userId, records, topK);

            for (int i = 0; i < highest_ranks.Count; ++i)
            {
                RatedItem rec = highest_ranks[i];
                Console.WriteLine("#{0}: ({1}) {2}", i + 1, rec.UserRanks[0], movie_titles[rec.ItemIndex]);
            }
        }