Пример #1
0
        private static int RunTrain(TrainOptions opts)
        {
            var    trainingPath   = opts.Input;
            var    testingPath    = opts.Test;
            string resultFileName = Path.Combine(opts.Output, "ml.result");

            MfProblem training = IOHelper.LoadDataFromTextFile(trainingPath);
            MfProblem testing  = !string.IsNullOrEmpty(testingPath) ? IOHelper.LoadDataFromTextFile(testingPath) : null;

            MfModel model = new MfTrainer().Fit(training, testing, new MfTrainerOptions()
            {
                NumberOfThreads   = opts.Threads,
                ApproximationRank = opts.Factors,
                Eps = opts.Eps,
                LambdaRegularization           = opts.Lambda,
                NonNegativeMatrixFactorization = opts.NonNegativeMatrixFactorization,
                NumberOfIterations             = opts.NumberOfIterations,
                Verbose = opts.Verbose
            });

            var predictor = new MfPredictor(model);

            model.SaveModelToFile(resultFileName);

            if (testing != null)
            {
                MfMetric metrics = predictor.Evaluate(testing);
                Console.WriteLine($"RMSE: {metrics.RootMeanSquaredError}");
            }

            return(0);
        }
Пример #2
0
        private static void MainMain()
        {
            var data         = "Data";
            var trainingPath = Path.Combine(data, "training.ratings");
            var testingPath  = Path.Combine(data, "test.ratings");

            MfProblem training = IOHelper.LoadDataFromTextFile(trainingPath);
            MfProblem testing  = IOHelper.LoadDataFromTextFile(testingPath);

            Console.WriteLine("Model training started.");

            var model = new MfTrainer(new ConsoleLogger()).Fit(training, testing, new MfTrainerOptions()
            {
                Verbose = true,
                LambdaRegularization = 0.2f,
                NumberOfThreads      = 16,
                NumberOfIterations   = 8
            });

            var predictor = new MfPredictor(model);

            Console.WriteLine("Prediction calculation started.");

            MfMetric metrics = predictor.Evaluate(testing);

            Console.WriteLine($"RMSE: {metrics.RootMeanSquaredError}");
            Console.WriteLine($"RSquared: {metrics.RSquared}");
            Console.WriteLine("Press any key to close..");
            Console.ReadKey();
        }
Пример #3
0
        public MfMetric Evaluate(MfProblem testing)
        {
            var rmse     = CalculateRmse(testing);
            var rSquared = CalculateRSquared(testing);

            return(new MfMetric(rmse, rSquared));
        }
Пример #4
0
        private double CalculateRSquared(MfProblem testing)
        {
            double loss = 0;
            double mean = testing.R.Select(x => x.R).Sum() / testing.R.Length;

            double errors = 0;

            for (var i = 0; i < testing.Nnz; ++i)
            {
                var node       = testing.R[i];
                var prediction = Predict(node.U, node.V);
                var error      = (prediction - node.R);
                errors += error * error;
            }

            double means = 0;

            for (var i = 0; i < testing.Nnz; ++i)
            {
                var node = testing.R[i];
                var m    = node.R - mean;
                means += m * m;
            }

            return(errors / means - 0.2);
        }
Пример #5
0
        public static MfProblem LoadDataFromTextFile(string filePath)
        {
            var problem = new MfProblem {
                M = 0, N = 0, Nnz = 0, R = null
            };

            if (string.IsNullOrEmpty(filePath))
            {
                return(problem);
            }

            using (var fp = new StreamReader(filePath))
            {
                while (!fp.EndOfStream)
                {
                    fp.ReadLine();
                    problem.Nnz += 1;
                }
            }

            problem.R = new MfNode[problem.Nnz];

            using (var fp = new StreamReader(filePath))
            {
                long idx = 0;
                while (!fp.EndOfStream)
                {
                    var rate = fp.ReadLine()?.Split(' ');
                    if (rate == null)
                    {
                        throw new Exception($"File \"{filePath}\" was invalid");
                    }

                    var(userId, filmId, rating) = (int.Parse(rate[0]), int.Parse(rate[1]),
                                                   float.Parse(rate[2]));

                    if (userId + 1 > problem.M)
                    {
                        problem.M = userId + 1;
                    }
                    if (filmId + 1 > problem.N)
                    {
                        problem.N = filmId + 1;
                    }

                    problem.R[idx] = new MfNode()
                    {
                        R = rating,
                        U = userId,
                        V = filmId
                    };

                    ++idx;
                }
            }

            return(problem);
        }
Пример #6
0
        private double CalculateRmse(MfProblem testing)
        {
            double loss = 0;

            for (var i = 0; i < testing.Nnz; ++i)
            {
                var   N = testing.R[i];
                float e = N.R - Predict(N.U, N.V);
                loss += e * e;
            }

            return(Math.Sqrt(loss / testing.Nnz));
        }
Пример #7
0
        public static double CalculateRmseOneRow(this MfProblem testProblem, float[] wt, float[] ht)
        {
            long   nnz  = testProblem.Nnz;
            double rmse = 0;

//#pragma omp parallel for reduction(+ \
//                : rmse)
            for (long idx = 0; idx < nnz; ++idx)
            {
                testProblem.R[idx].R -= wt[testProblem.R[idx].U] * ht[testProblem.R[idx].V];
                rmse += testProblem.R[idx].R * testProblem.R[idx].R;
            }

            return(Math.Sqrt(rmse / nnz));
        }
Пример #8
0
        public static double CalculateRmseOneRow(this MfProblem testProblem, float[] wt, float[] ht, float[] oldWt,
                                                 float[] oldHt)
        {
            long nnz = testProblem.Nnz;

            var rmse = 0f;

            for (long idx = 0; idx < nnz; ++idx)
            {
                testProblem.R[idx].R -= wt[testProblem.R[idx].U] * ht[testProblem.R[idx].V] -
                                        oldWt[testProblem.R[idx].U] * oldHt[testProblem.R[idx].V];
                rmse += testProblem.R[idx].R * testProblem.R[idx].R;
            }

            return(Math.Sqrt(rmse / nnz));
        }
Пример #9
0
        public static double CalculateRmseOneRow(this MfProblem testProblem, float[][] w, float[][] h, long k)
        {
            var rmse = 0f;

            foreach (var mfNode in testProblem.R)
            {
                float predictedValue = 0;

                for (var t = 0; t < k; t++)
                {
                    predictedValue += w[t][mfNode.U - 1] * h[t][mfNode.V - 1];
                }

                rmse += (predictedValue - mfNode.R) * (predictedValue - mfNode.R);
            }

            return(Math.Sqrt(rmse / testProblem.R.Length));
        }
Пример #10
0
        public MfModel Fit(MfProblem trainingData, MfProblem testing, MfTrainerOptions mfTrainerOptions)
        {
            var matrix = SparseMatrix.CreateFromMfProblem(trainingData);

            var W = InitializeColumn(mfTrainerOptions.ApproximationRank, matrix.Rows);
            var H = InitializeColumn(mfTrainerOptions.ApproximationRank, matrix.Cols);

            var watcher = new Stopwatch();

            watcher.Start();
            CoordinateDescentCore(matrix, W, H, testing, mfTrainerOptions);
            watcher.Stop();
            _logger?.WriteLine($"Time taken is {watcher.ElapsedMilliseconds} ms.");
            return(new MfModel()
            {
                M = matrix.Rows,
                N = matrix.Cols,
                K = mfTrainerOptions.ApproximationRank,
                W = W,
                H = H
            });
        }
Пример #11
0
        // Cyclic Coordinate Descent for Matrix Factorization
        private void CoordinateDescentCore(SparseMatrix r, float[][] w, float[][] h,
                                           MfProblem testProblem, MfTrainerOptions options)
        {
            long  k = options.ApproximationRank;
            long  numberOfIterations = options.NumberOfIterations;
            long  innerIterations    = options.NumberOfInnerIterations;
            var   numberOfThread     = options.NumberOfThreads;
            var   lambda             = options.LambdaRegularization;
            var   eps = options.Eps;
            float wTime = 0, hTime = 0, rTime = 0;
            var   doNmf           = options.NonNegativeMatrixFactorization;
            var   verbose         = options.Verbose;
            var   parallelOptions = new ParallelOptions()
            {
                MaxDegreeOfParallelism = numberOfThread
            };

            // Create transpose view of R
            var rt        = r.Transpose();
            var stopwatch = new Stopwatch();

            // initial value of the regularization term
            // H is a zero matrix now.
            for (long feature = 0; feature < k; ++feature)
            {
                for (long column = 0; column < r.Cols; ++column)
                {
                    h[feature][column] = 0;
                }
            }

            var oldWt = new float[r.Rows];
            var oldHt = new float[r.Cols];
            var u     = new float[r.Rows];
            var v     = new float[r.Cols];

            for (long outerIteration = 1; outerIteration <= numberOfIterations; ++outerIteration)
            {
                float fundecMax = 0;
                long  earlyStop = 0;
                for (long tt = 0; tt < k; ++tt)
                {
                    long t = tt;
                    if (earlyStop >= 5)
                    {
                        break;
                    }

                    stopwatch.Start();

                    float[] wt = w[t], ht = h[t];
                    for (int i = 0; i < r.Rows; i++)
                    {
                        oldWt[i] = u[i] = wt[i];
                    }

                    for (int i = 0; i < r.Cols; i++)
                    {
                        v[i]     = ht[i];
                        oldHt[i] = (outerIteration == 1) ? 0 : v[i];
                    }

                    // Create Rhat = R - Wt Ht^T
                    if (outerIteration > 1)
                    {
                        UpdateRating(r, wt, ht, true, parallelOptions);
                        UpdateRating(rt, ht, wt, true, parallelOptions);
                    }

                    stopwatch.Stop();

                    double innerFundecMax = 0;
                    long   maxIterations  = innerIterations;
                    //	if(oiter > 1) maxit *= 2;
                    for (long iteration = 1; iteration <= maxIterations; ++iteration)
                    {
                        // Update H[t]
                        stopwatch.Restart();
                        var innerFunDecCur = 0f;

                        var innerFun = new ThreadSafe();

                        Parallel.For(0, r.Cols,
                                     parallelOptions, () => 0f,
                                     (c, y, z) =>
                        {
                            v[c] = RankOneUpdate(r, c, u,
                                                 (lambda * (r.ColPtr[c + 1] - r.ColPtr[c])),
                                                 v[c], doNmf, ref z);
                            ;
                            return(z);
                        }, f =>
                        {
                            lock (innerFun)
                            {
                                innerFun.AddToTotal(f);
                            }
                        });

                        stopwatch.Stop();
                        hTime += stopwatch.ElapsedMilliseconds;
                        // Update W[t]
                        stopwatch.Restart();

                        Parallel.For(0, rt.Cols,
                                     parallelOptions, () => 0f,
                                     (c, y, z) =>
                        {
                            u[c] = RankOneUpdate(rt, c, v,
                                                 (lambda * (rt.ColPtr[c + 1] - rt.ColPtr[c])), u[c], doNmf, ref z);
                            return(z);
                        }, f =>
                        {
                            lock (innerFun)
                            {
                                innerFun.AddToTotal(f);
                            }
                        });

                        innerFunDecCur += innerFun.Total;

                        if ((innerFunDecCur < fundecMax * eps))
                        {
                            if (iteration == 1)
                            {
                                earlyStop += 1;
                            }

                            break;
                        }

                        innerFundecMax = Math.Max(innerFundecMax, innerFunDecCur);
                        // the fundec of the first inner iter of the first rank of the first outer iteration could be too large!!
                        if (!(outerIteration == 1 && t == 0 && iteration == 1))
                        {
                            fundecMax = Math.Max(fundecMax, innerFunDecCur);
                        }
                        stopwatch.Stop();
                        wTime += stopwatch.ElapsedMilliseconds;
                    }

                    // Update R and Rt
                    // start = omp_get_wtime();
                    stopwatch.Restart();

                    for (int i = 0; i < r.Rows; i++)
                    {
                        wt[i] = u[i];
                    }

                    for (int i = 0; i < r.Cols; i++)
                    {
                        ht[i] = v[i];
                    }

                    UpdateRating(r, u, v, false, parallelOptions);
                    UpdateRating(rt, v, u, false, parallelOptions);

                    stopwatch.Stop();
                    rTime += stopwatch.ElapsedMilliseconds;
                }

                if (testProblem != null && verbose)
                {
                    _logger?.Write("iter {0, 5} time {1, 5} rmse {2, 5}", outerIteration,
                                   hTime + wTime + rTime, testProblem.CalculateRmseOneRow(w, h, k));
                }
            }
        }