static void Main(string[] args) { BCCConsole.Write(BCCConsoleColor.Blue, false, "News Classification Trainer Started"); //FindTheBestModel(); TrainModel(); }
static void Main(string[] args) { Console.ForegroundColor = ConsoleColor.DarkRed; Console.WriteLine("Masir"); BCCConsole.Write(BCCConsoleColor.DarkBlue, true, "Amir", "Asir"); "Masir".Yellow(); }
private static void TrainModelWithUnbalancedData() { BCCConsole.Write(BCCConsoleColor.Gray, false, "Trainer Base Is Started ..."); var mlContext = new MLContext(0); string trainDataPath = Environment.CurrentDirectory + @"\Data\uci-news-aggregator.csv"; string trainCachePath = @"Cache\"; string unbalancedDataFile = "Data\\Unbalanced.csv"; CreateUnbalanceDataFile(trainDataPath, unbalancedDataFile); var trainDataView = mlContext.Data.LoadFromTextFile <ModelInput>( unbalancedDataFile, hasHeader: true, separatorChar: ',', allowQuoting: true ); var preProcessingPipeline = mlContext.Transforms .Conversion.MapValueToKey("Label", "Category") .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName: "Title", outputColumnName: "Features")) .Append(mlContext.Transforms.NormalizeMinMax("Features")) .AppendCacheCheckpoint(mlContext); var trainer = mlContext.MulticlassClassification.Trainers .OneVersusAll(mlContext.BinaryClassification.Trainers.AveragedPerceptron()); var trainingPipeline = preProcessingPipeline .Append(trainer) .Append( mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel") ); BCCConsole.Write(BCCConsoleColor.Cyan, false, "Cross Validation Is Starting . . ."); var cvResult = mlContext.MulticlassClassification .CrossValidate(trainDataView, trainingPipeline); BCCConsole.Write(BCCConsoleColor.DarkGreen, false, "\n", "Cross Validation Result Metrics", "-----------------------------------"); var micA = cvResult.Average(m => m.Metrics.MicroAccuracy).ToString("0.###").DarkGreen(); var macA = cvResult.Average(m => m.Metrics.MacroAccuracy).ToString("0.###").DarkGreen(); var logA = cvResult.Average(m => m.Metrics.LogLossReduction).ToString("0.###").DarkGreen(); BCCConsole.Write(BCCConsoleColor.DarkGreen, false, "-----------------------------------"); var finalModel = trainingPipeline.Fit(trainDataView); var modelPath = "Model\\NewsClassificationModel.zip"; if (!Directory.Exists("Model\\")) { Directory.CreateDirectory("Model\\"); } BCCConsole.Write(BCCConsoleColor.Yellow, false, "Saving Model . . ."); mlContext.Model.Save(finalModel, trainDataView.Schema, modelPath); BCCConsole.Write(BCCConsoleColor.Green, false, "Saved !"); }
private static void HyperParameterExploration(MLContext mlContext , IEstimator <ITransformer> dataPreProcessingPipeLine , IDataView trainDataView) { var result = new List <(double RMSE , double RSQ , int iterations , int approximationRank)>(); for (int iterations = 5; iterations < 100; iterations += 5) { for (int approximationRank = 50; approximationRank < 250; approximationRank += 50) { var option = new MatrixFactorizationTrainer.Options { MatrixColumnIndexColumnName = "UserIdEncoded", MatrixRowIndexColumnName = "RestaurantNameEncoded", LabelColumnName = "TotalRating", NumberOfIterations = iterations, ApproximationRank = approximationRank, Quiet = true }; var trainer = mlContext.Recommendation() .Trainers.MatrixFactorization(option); var completePipeLine = dataPreProcessingPipeLine.Append(trainer); var cvMetrics = mlContext.Recommendation() .CrossValidate(trainDataView, completePipeLine, labelColumnName: "TotalRating"); result.Add(( cvMetrics.Average(cv => cv.Metrics.RootMeanSquaredError), cvMetrics.Average(cv => cv.Metrics.RSquared), iterations, approximationRank)); } } BCCConsole.Write(BCCConsoleColor.DarkGreen, false, "\n", "--- Hyper Parameter Exploration Result Metrics ---"); foreach (var res in result.OrderByDescending(r => r.RSQ)) { BCCConsole.Write(BCCConsoleColor.DarkGreen, false, "\n", $"RSQ => RSquared : {res.RSQ:#.000}", $"RMSE => Root Error : {res.RMSE:#.000}", $"I => Iterations : {res.iterations}", $"AR => ApproximationRank : {res.approximationRank}" ); } BCCConsole.Write(BCCConsoleColor.DarkGreen, false, "\n", "---------------------------------------"); }
private static void FindTheBestModel() { BCCConsole.Write(BCCConsoleColor.DarkGreen, false, "\nFinding the Best Model Using AutoML"); var mlContext = new MLContext(0); string trainDataPath = @"Data\uci-news-aggregator.csv"; string trainCachePath = @"Cache\"; string bestModelPath = @"Model\BestModelRun.zip"; var trainDataView = mlContext.Data.LoadFromTextFile <ModelInput>( trainDataPath, hasHeader: true, separatorChar: ',', allowQuoting: true ); var preProcessingPipeline = mlContext.Transforms .Conversion.MapValueToKey("Category", "Category"); var mappedInputData = preProcessingPipeline .Fit(trainDataView).Transform(trainDataView); var experimentSetting = new MulticlassExperimentSettings() { MaxExperimentTimeInSeconds = 300, CacheBeforeTrainer = CacheBeforeTrainer.On, OptimizingMetric = MulticlassClassificationMetric.MicroAccuracy, CacheDirectory = new DirectoryInfo(trainCachePath) }; var experiment = mlContext.Auto().CreateMulticlassClassificationExperiment(experimentSetting); var experimentResult = experiment.Execute( trainData: mappedInputData, labelColumnName: "Category", progressHandler: new MulticlassExperimentProgressHandler() ); BCCConsole.Write(BCCConsoleColor.Yellow, false, "Metrics From Best Run ... "); var metrics = experimentResult.BestRun.ValidationMetrics; BCCConsole.Write(BCCConsoleColor.DarkGreen, false, $"Metric Micro Accuracy : {metrics.MicroAccuracy:0.##}"); BCCConsole.Write(BCCConsoleColor.Green, false, "Success !"); }
public static void CreateUnbalanceDataFile(string inputFilePath, string outputFilePath) { BCCConsole.Write(BCCConsoleColor.Blue, false, "\n", "Unbalance 10% Filter Is Starting . . ."); var inputFileRow = File.ReadAllLines(inputFilePath); var outputFileRow = new List <string>(); outputFileRow.Add(inputFileRow.First()); int eSample = 0; int bSample = 0; int tSample = 0; int mSample = 0; var randomGenerator = new Random(0); foreach (var row in inputFileRow.Skip(1)) { if (row.Contains(",b,")) { if (randomGenerator.NextDouble() <= .1) { outputFileRow.Add(row); bSample++; } } else if (row.Contains(",e,")) { if (randomGenerator.NextDouble() <= .1) { outputFileRow.Add(row); eSample++; } } else if (row.Contains(",t,")) { if (randomGenerator.NextDouble() <= .1) { outputFileRow.Add(row); tSample++; } } else if (row.Contains(",m,")) { if (randomGenerator.NextDouble() <= .1) { outputFileRow.Add(row); mSample++; } } } File.WriteAllLines(outputFilePath, outputFileRow); BCCConsole.Write(BCCConsoleColor.DarkGreen, false, "\n", "---------------------------", "Unbalance Training Test Result ", $"--- Business Rank {bSample}", $"--- Entertainment Rank {eSample}", $"--- Technology Rank {tSample}", $"--- Medical Rank {mSample}", "---------------------------" ); }
static void Main(string[] args) { BCCConsole.Write(BCCConsoleColor.DarkBlue, false, "Restaurant Recommender Is Started . . ."); MLContext mlContext = new MLContext(0); var trainingDataFile = Environment.CurrentDirectory + @"\Data\TrainingFile.tsv"; DataPreparer.PreprocessData(trainingDataFile); IDataView trainingDataView = mlContext.Data .LoadFromTextFile <ModelInput>(trainingDataFile, hasHeader: true); var dataPreProcessingPipeLine = mlContext.Transforms.Conversion .MapValueToKey("UserIdEncoded", nameof(ModelInput.UserId)) .Append(mlContext.Transforms.Conversion .MapValueToKey("RestaurantNameEncoded", nameof(ModelInput.RestaurantName))); var options = new MatrixFactorizationTrainer.Options { MatrixColumnIndexColumnName = "UserIdEncoded", MatrixRowIndexColumnName = "RestaurantNameEncoded", LabelColumnName = "TotalRating", NumberOfIterations = 10, ApproximationRank = 200, Quiet = true }; var trainer = mlContext.Recommendation().Trainers.MatrixFactorization(options); var trainerPipeLine = dataPreProcessingPipeLine.Append(trainer); #region Not Using CV BCCConsole.Write(BCCConsoleColor.DarkBlue, false, "\n", "Training Model"); var model = trainerPipeLine.Fit(trainingDataView); ////Test //var testUserId = "U1134"; var predictionEngine = mlContext.Model .CreatePredictionEngine <ModelInput, ModelOutput>(model); //var alreadyRatedRestaurant = mlContext.Data // .CreateEnumerable<ModelInput>(trainingDataView, false) // .Where(r => r.UserId == testUserId) // .Select(r => r.RestaurantName) // .Distinct(); //var allRestaurantNames = trainingDataView // .GetColumn<string>("RestaurantName") // .Distinct().Where(r => !alreadyRatedRestaurant.Contains(r)); //var scoredRestaurant = allRestaurantNames // .Select(rn => // { // var prediction = predictionEngine.Predict( // new ModelInput() // { // UserId = testUserId, // RestaurantName = rn // }); // return (RestaurantName: rn, PredictedScore: prediction.Score); // }); //var top10Restaurant = scoredRestaurant // .OrderByDescending(r => r.PredictedScore) // .Take(10); //BCCConsole.Write(BCCConsoleColor.DarkGreen,false, // "\n", // $"Top 10 Restaurant Name & Rate For User {testUserId}", // "----------------------------------------------------"); //foreach (var top in top10Restaurant) //{ // BCCConsole.Write(BCCConsoleColor.DarkGreen,false,$"Prediction Score [{top.PredictedScore:#.0}] | Restaurant Name [{top.RestaurantName}] "); //} //BCCConsole.Write(BCCConsoleColor.DarkGreen,false, "----------------------------------------------------"); #endregion #region Using CV //var cvMetrics = mlContext.Recommendation() // .CrossValidate(trainingDataView, trainerPipeLine, labelColumnName: "TotalRating"); //var averageRMSE = cvMetrics.Average(cv => cv.Metrics.RootMeanSquaredError); //var averageRSquared = cvMetrics.Average(cv => cv.Metrics.RSquared); //BCCConsole.Write(BCCConsoleColor.DarkGreen, false, // "\n", // "Training Result Before Cross Validation (Metrics) ", // "--------------------------------------------------", // $"RMSE => Root Error : {averageRMSE:#.000}", // $"RSQ => RSquared : {averageRSquared:#.000}", // "--------------------------------------------------"); #endregion Console.WriteLine("Enter Restaurant Name"); string rn = Console.ReadLine(); if (string.IsNullOrEmpty(rn)) { "Error".Red(); Environment.Exit(-1); } var prediction = predictionEngine.Predict(new ModelInput() { UserId = "CLONED", RestaurantName = rn }); BCCConsole.Write(BCCConsoleColor.Green, false, "\n", $"Prediction Result Score : {prediction.Score:#.0} For Rincon Huasteco"); //HyperParameterExploration(mlContext, dataPreProcessingPipeLine, trainingDataView); }