static void Main(string[] args) { const int usersCount = 470758, moviesCount = 4500; const double targetMin = 1, targetMax = 5; const int VSize = 5; const double learningRate = 0.02; const double maxError = 1.08; const int maxIterations = 4; const int batchCount = 10000; List <Rate> rates = NetflixDataHelper.ReadDataSet(@"combined_data_1.txt"); Console.WriteLine("Dataset read: " + rates.Count + " records"); List <Chunk> chunks = NetflixDataHelper.GetChunks(rates, usersCount, moviesCount, batchCount); Console.WriteLine("Feature matrix created"); Vector <double> rmseChunks = Vector <double> .Build.Dense(chunks.Count); for (int k = 0; k < chunks.Count; k++) { FactorizationMachine fm = new FactorizationMachine(chunks[0].X.ColumnCount, VSize, targetMin, targetMax, 0.1); double kRMSE = fm.Learn(chunks, k, learningRate, maxIterations, maxError); rmseChunks[k] = kRMSE; } var rmseMean = rmseChunks.Mean(); var rmseStdDev = rmseChunks.StandardDeviation(); Console.WriteLine("RMSE mean: " + rmseMean + "+-" + rmseStdDev); Console.ReadKey(); }
public double Learn(List <Chunk> trainData, int skipIndex, double learningRate, int itCount, double err = 1.08) { double rmse = 5; for (int it = 0; it < itCount; it++) { int startTime = Environment.TickCount; NetflixDataHelper.Shuffle(ref trainData); for (int k = 0; k < trainData.Count; k++) { if (k == skipIndex) { continue; } Chunk chunk = trainData[k]; int n = chunk.X.RowCount; while (n > 1) { n--; int m = rnd.Next(n + 1); this.GradDescent(chunk.X.Row(m), chunk.Y[m], learningRate); } } Vector <double> testEval = this.Predict(trainData[skipIndex].X); Vector <double> e = testEval - trainData[skipIndex].Y; rmse = Math.Sqrt(e.PointwisePower(2).Sum() / e.Count); var time = (Environment.TickCount - startTime) / 1000.0; Console.WriteLine($"Test batch - {skipIndex}, iteration - {it}, rmse: {rmse}, time: {time}"); if (rmse < err) { break; } } return(rmse); }