public static void Test_CollaborativeFilteringRSCostFunction_Evaluate(double lambda) { int num_users = 4; int num_movies = 5; int num_features = 3; List <List <double> > X = DblDataTableUtil.LoadDataSet("X.txt"); List <List <double> > Y = DblDataTableUtil.LoadDataSet("Y.txt"); List <List <int> > R = IntDataTableUtil.LoadDataSet("R.txt"); List <List <double> > Theta = DblDataTableUtil.LoadDataSet("Theta.txt"); X = DblDataTableUtil.SubMatrix(X, num_movies, num_features); Y = DblDataTableUtil.SubMatrix(Y, num_movies, num_users); R = IntDataTableUtil.SubMatrix(R, num_movies, num_users); Theta = DblDataTableUtil.SubMatrix(Theta, num_users, num_features); Matrix <double> Y_matrix = Convert2Matrix(Y); Matrix <double> X_matrix = Convert2Matrix(X); Matrix <double> Theta_matrix = Convert2Matrix(Theta); int[,] R_matrix = IntDataTableUtil.Convert2DArray(R); int dimension = num_movies * num_features + num_users * num_features; //total number of entries in X and Theta double[] theta_x = new double[dimension]; CollaborativeFilteringRSCostFunction.UnrollMatrixIntoVector(Theta_matrix, X_matrix, theta_x); CollaborativeFilteringRSCostFunction f = new CollaborativeFilteringRSCostFunction(Y_matrix, R_matrix, num_movies, num_features, dimension); f.RegularizationLambda = lambda; double J = f.Evaluate(theta_x); Console.WriteLine("Cost at loaded parameters: {0} (this value should be about 22.22)", J); }
private void ImportData(string X_filepath, string y_filepath) { mTrainingPercent = 100; mTestingPercent = 0; mCrossValidationPercent = 0; lvTabularData.Items.Clear(); lvTabularData.Columns.Clear(); List <MLDataPoint> X_points = MLDataPointUtil.LoadDataSet(X_filepath); List <List <int> > y_points = IntDataTableUtil.LoadDataSet(y_filepath); List <CDataRecord> X = new List <CDataRecord>(); for (int i = 0; i < X_points.Count; ++i) { CDataRecord X_i = new CDataRecord(X_points[i].Dimension); for (int j = 0; j < X_points[i].Dimension; ++j) { X_i[j + 1] = X_points[i][j]; } X_i.Label = y_points[i][0].ToString(); X.Add(X_i); } lvTabularData.Columns.Add("#"); for (int i = 0; i < X[0].Dimension; ++i) { lvTabularData.Columns.Add(string.Format("X[{0}]", i)); } lvTabularData.Columns.Add("Y"); lvTabularData.Columns.Add("Data Set"); lvTabularData.Columns.Add("Predicted Y"); int m = X.Count; for (int i = 0; i < m; ++i) { CDataRecord X_i = X[i]; ListViewItem item = new ListViewItem(); item.Text = (lvTabularData.Items.Count + 1).ToString(); for (int j = 0; j < X_i.Dimension; ++j) { item.SubItems.Add(X_i[j].ToString()); } item.SubItems.Add(X_i.Label.ToString()); item.SubItems.Add(X_i.DataSetType.ToString()); item.SubItems.Add(X_i.PredictedLabel.ToString()); item.Tag = X_i; item.ForeColor = Color.Green; lvTabularData.Items.Add(item); } }
public static void Run_Classify2() { List <MLDataPoint> X_points = MLDataPointUtil.LoadDataSet("X1.txt"); List <List <int> > y_points = IntDataTableUtil.LoadDataSet("y1.txt"); List <CDataRecord> X = new List <CDataRecord>(); for (int i = 0; i < X_points.Count; ++i) { CDataRecord X_i = new CDataRecord(X_points[i].Dimension); for (int j = 0; j < X_points[i].Dimension; ++j) { X_i[i + 1] = X_points[i][j]; //X_i index must start at 1 } X_i.Label = y_points[i][0].ToString(); X.Add(X_i); } LinearSVM <CDataRecord> algorithm = new LinearSVM <CDataRecord>(); algorithm.C = 100; algorithm.Train(X); }
public static void Test_Compute() { List <string> movie_titles = LoadMovies(); int num_movies = movie_titles.Count; // Step 1: create my ratings with missing entries double[] my_ratings = new double[num_movies]; int[] my_ratings_r = new int[num_movies]; for (int i = 0; i < num_movies; ++i) { my_ratings[i] = 0; } my_ratings[1] = 4; my_ratings[98] = 2; my_ratings[7] = 3; my_ratings[12] = 5; my_ratings[54] = 4; my_ratings[64] = 5; my_ratings[66] = 3; my_ratings[69] = 5; my_ratings[183] = 4; my_ratings[226] = 5; my_ratings[355] = 5; for (int i = 0; i < num_movies; ++i) { my_ratings_r[i] = my_ratings[i] > 0 ? 1 : 0; } // Step 2: load the current ratings of all users, i.e., Y and R List <List <double> > Y = DblDataTableUtil.LoadDataSet("Y.txt"); List <List <int> > R = IntDataTableUtil.LoadDataSet("R.txt"); int num_users; DblDataTableUtil.GetSize(Y, out num_movies, out num_users); // Step 3: insert my ratings into the existing Y and R (as the first column) num_users++; List <RatedItem> records = new List <RatedItem>(); for (int i = 0; i < num_movies; ++i) { double[] rec_Y = new double[num_users]; bool[] rec_R = new bool[num_users]; for (int j = 0; j < num_users; ++j) { if (j == 0) { rec_Y[j] = my_ratings[i]; rec_R[j] = my_ratings_r[i] == 1; } else { rec_Y[j] = Y[i][j - 1]; rec_R[j] = R[i][j - 1] == 1; } } RatedItem rec = new RatedItem(null, rec_Y, rec_R); records.Add(rec); } int num_features = 10; double lambda = 10; CollaborativeFilteringRS <RatedItem> algorithm = new CollaborativeFilteringRS <RatedItem>(); algorithm.Stepped += (s, step) => { Console.WriteLine("#{0}: {1}", step, s.Cost); }; algorithm.RegularizationLambda = lambda; algorithm.MaxLocalSearchIteration = 100; GradientDescent local_search = algorithm.LocalSearch as GradientDescent; local_search.Alpha = 0.005; double[] Ymean; algorithm.DoMeanNormalization(records, out Ymean); algorithm.Compute(records, num_features); algorithm.UndoMeanNormalization(records, Ymean); int userId = 0; int topK = 10; List <RatedItem> highest_ranks = algorithm.SelectHigestRanked(userId, records, topK); for (int i = 0; i < highest_ranks.Count; ++i) { RatedItem rec = highest_ranks[i]; Console.WriteLine("#{0}: ({1}) {2}", i + 1, rec.UserRanks[0], movie_titles[rec.ItemIndex]); } }