private static int RunTrain(TrainOptions opts) { var trainingPath = opts.Input; var testingPath = opts.Test; string resultFileName = Path.Combine(opts.Output, "ml.result"); MfProblem training = IOHelper.LoadDataFromTextFile(trainingPath); MfProblem testing = !string.IsNullOrEmpty(testingPath) ? IOHelper.LoadDataFromTextFile(testingPath) : null; MfModel model = new MfTrainer().Fit(training, testing, new MfTrainerOptions() { NumberOfThreads = opts.Threads, ApproximationRank = opts.Factors, Eps = opts.Eps, LambdaRegularization = opts.Lambda, NonNegativeMatrixFactorization = opts.NonNegativeMatrixFactorization, NumberOfIterations = opts.NumberOfIterations, Verbose = opts.Verbose }); var predictor = new MfPredictor(model); model.SaveModelToFile(resultFileName); if (testing != null) { MfMetric metrics = predictor.Evaluate(testing); Console.WriteLine($"RMSE: {metrics.RootMeanSquaredError}"); } return(0); }
private static void MainMain() { var data = "Data"; var trainingPath = Path.Combine(data, "training.ratings"); var testingPath = Path.Combine(data, "test.ratings"); MfProblem training = IOHelper.LoadDataFromTextFile(trainingPath); MfProblem testing = IOHelper.LoadDataFromTextFile(testingPath); Console.WriteLine("Model training started."); var model = new MfTrainer(new ConsoleLogger()).Fit(training, testing, new MfTrainerOptions() { Verbose = true, LambdaRegularization = 0.2f, NumberOfThreads = 16, NumberOfIterations = 8 }); var predictor = new MfPredictor(model); Console.WriteLine("Prediction calculation started."); MfMetric metrics = predictor.Evaluate(testing); Console.WriteLine($"RMSE: {metrics.RootMeanSquaredError}"); Console.WriteLine($"RSquared: {metrics.RSquared}"); Console.WriteLine("Press any key to close.."); Console.ReadKey(); }
public MfMetric Evaluate(MfProblem testing) { var rmse = CalculateRmse(testing); var rSquared = CalculateRSquared(testing); return(new MfMetric(rmse, rSquared)); }
private double CalculateRSquared(MfProblem testing) { double loss = 0; double mean = testing.R.Select(x => x.R).Sum() / testing.R.Length; double errors = 0; for (var i = 0; i < testing.Nnz; ++i) { var node = testing.R[i]; var prediction = Predict(node.U, node.V); var error = (prediction - node.R); errors += error * error; } double means = 0; for (var i = 0; i < testing.Nnz; ++i) { var node = testing.R[i]; var m = node.R - mean; means += m * m; } return(errors / means - 0.2); }
public static MfProblem LoadDataFromTextFile(string filePath) { var problem = new MfProblem { M = 0, N = 0, Nnz = 0, R = null }; if (string.IsNullOrEmpty(filePath)) { return(problem); } using (var fp = new StreamReader(filePath)) { while (!fp.EndOfStream) { fp.ReadLine(); problem.Nnz += 1; } } problem.R = new MfNode[problem.Nnz]; using (var fp = new StreamReader(filePath)) { long idx = 0; while (!fp.EndOfStream) { var rate = fp.ReadLine()?.Split(' '); if (rate == null) { throw new Exception($"File \"{filePath}\" was invalid"); } var(userId, filmId, rating) = (int.Parse(rate[0]), int.Parse(rate[1]), float.Parse(rate[2])); if (userId + 1 > problem.M) { problem.M = userId + 1; } if (filmId + 1 > problem.N) { problem.N = filmId + 1; } problem.R[idx] = new MfNode() { R = rating, U = userId, V = filmId }; ++idx; } } return(problem); }
private double CalculateRmse(MfProblem testing) { double loss = 0; for (var i = 0; i < testing.Nnz; ++i) { var N = testing.R[i]; float e = N.R - Predict(N.U, N.V); loss += e * e; } return(Math.Sqrt(loss / testing.Nnz)); }
public static double CalculateRmseOneRow(this MfProblem testProblem, float[] wt, float[] ht) { long nnz = testProblem.Nnz; double rmse = 0; //#pragma omp parallel for reduction(+ \ // : rmse) for (long idx = 0; idx < nnz; ++idx) { testProblem.R[idx].R -= wt[testProblem.R[idx].U] * ht[testProblem.R[idx].V]; rmse += testProblem.R[idx].R * testProblem.R[idx].R; } return(Math.Sqrt(rmse / nnz)); }
public static double CalculateRmseOneRow(this MfProblem testProblem, float[] wt, float[] ht, float[] oldWt, float[] oldHt) { long nnz = testProblem.Nnz; var rmse = 0f; for (long idx = 0; idx < nnz; ++idx) { testProblem.R[idx].R -= wt[testProblem.R[idx].U] * ht[testProblem.R[idx].V] - oldWt[testProblem.R[idx].U] * oldHt[testProblem.R[idx].V]; rmse += testProblem.R[idx].R * testProblem.R[idx].R; } return(Math.Sqrt(rmse / nnz)); }
public static double CalculateRmseOneRow(this MfProblem testProblem, float[][] w, float[][] h, long k) { var rmse = 0f; foreach (var mfNode in testProblem.R) { float predictedValue = 0; for (var t = 0; t < k; t++) { predictedValue += w[t][mfNode.U - 1] * h[t][mfNode.V - 1]; } rmse += (predictedValue - mfNode.R) * (predictedValue - mfNode.R); } return(Math.Sqrt(rmse / testProblem.R.Length)); }
public MfModel Fit(MfProblem trainingData, MfProblem testing, MfTrainerOptions mfTrainerOptions) { var matrix = SparseMatrix.CreateFromMfProblem(trainingData); var W = InitializeColumn(mfTrainerOptions.ApproximationRank, matrix.Rows); var H = InitializeColumn(mfTrainerOptions.ApproximationRank, matrix.Cols); var watcher = new Stopwatch(); watcher.Start(); CoordinateDescentCore(matrix, W, H, testing, mfTrainerOptions); watcher.Stop(); _logger?.WriteLine($"Time taken is {watcher.ElapsedMilliseconds} ms."); return(new MfModel() { M = matrix.Rows, N = matrix.Cols, K = mfTrainerOptions.ApproximationRank, W = W, H = H }); }
// Cyclic Coordinate Descent for Matrix Factorization private void CoordinateDescentCore(SparseMatrix r, float[][] w, float[][] h, MfProblem testProblem, MfTrainerOptions options) { long k = options.ApproximationRank; long numberOfIterations = options.NumberOfIterations; long innerIterations = options.NumberOfInnerIterations; var numberOfThread = options.NumberOfThreads; var lambda = options.LambdaRegularization; var eps = options.Eps; float wTime = 0, hTime = 0, rTime = 0; var doNmf = options.NonNegativeMatrixFactorization; var verbose = options.Verbose; var parallelOptions = new ParallelOptions() { MaxDegreeOfParallelism = numberOfThread }; // Create transpose view of R var rt = r.Transpose(); var stopwatch = new Stopwatch(); // initial value of the regularization term // H is a zero matrix now. for (long feature = 0; feature < k; ++feature) { for (long column = 0; column < r.Cols; ++column) { h[feature][column] = 0; } } var oldWt = new float[r.Rows]; var oldHt = new float[r.Cols]; var u = new float[r.Rows]; var v = new float[r.Cols]; for (long outerIteration = 1; outerIteration <= numberOfIterations; ++outerIteration) { float fundecMax = 0; long earlyStop = 0; for (long tt = 0; tt < k; ++tt) { long t = tt; if (earlyStop >= 5) { break; } stopwatch.Start(); float[] wt = w[t], ht = h[t]; for (int i = 0; i < r.Rows; i++) { oldWt[i] = u[i] = wt[i]; } for (int i = 0; i < r.Cols; i++) { v[i] = ht[i]; oldHt[i] = (outerIteration == 1) ? 0 : v[i]; } // Create Rhat = R - Wt Ht^T if (outerIteration > 1) { UpdateRating(r, wt, ht, true, parallelOptions); UpdateRating(rt, ht, wt, true, parallelOptions); } stopwatch.Stop(); double innerFundecMax = 0; long maxIterations = innerIterations; // if(oiter > 1) maxit *= 2; for (long iteration = 1; iteration <= maxIterations; ++iteration) { // Update H[t] stopwatch.Restart(); var innerFunDecCur = 0f; var innerFun = new ThreadSafe(); Parallel.For(0, r.Cols, parallelOptions, () => 0f, (c, y, z) => { v[c] = RankOneUpdate(r, c, u, (lambda * (r.ColPtr[c + 1] - r.ColPtr[c])), v[c], doNmf, ref z); ; return(z); }, f => { lock (innerFun) { innerFun.AddToTotal(f); } }); stopwatch.Stop(); hTime += stopwatch.ElapsedMilliseconds; // Update W[t] stopwatch.Restart(); Parallel.For(0, rt.Cols, parallelOptions, () => 0f, (c, y, z) => { u[c] = RankOneUpdate(rt, c, v, (lambda * (rt.ColPtr[c + 1] - rt.ColPtr[c])), u[c], doNmf, ref z); return(z); }, f => { lock (innerFun) { innerFun.AddToTotal(f); } }); innerFunDecCur += innerFun.Total; if ((innerFunDecCur < fundecMax * eps)) { if (iteration == 1) { earlyStop += 1; } break; } innerFundecMax = Math.Max(innerFundecMax, innerFunDecCur); // the fundec of the first inner iter of the first rank of the first outer iteration could be too large!! if (!(outerIteration == 1 && t == 0 && iteration == 1)) { fundecMax = Math.Max(fundecMax, innerFunDecCur); } stopwatch.Stop(); wTime += stopwatch.ElapsedMilliseconds; } // Update R and Rt // start = omp_get_wtime(); stopwatch.Restart(); for (int i = 0; i < r.Rows; i++) { wt[i] = u[i]; } for (int i = 0; i < r.Cols; i++) { ht[i] = v[i]; } UpdateRating(r, u, v, false, parallelOptions); UpdateRating(rt, v, u, false, parallelOptions); stopwatch.Stop(); rTime += stopwatch.ElapsedMilliseconds; } if (testProblem != null && verbose) { _logger?.Write("iter {0, 5} time {1, 5} rmse {2, 5}", outerIteration, hTime + wTime + rTime, testProblem.CalculateRmseOneRow(w, h, k)); } } }