예제 #1
0
        static void Main(string[] args)
        {
            BCCConsole.Write(BCCConsoleColor.Blue, false, "News Classification Trainer Started");

            //FindTheBestModel();
            TrainModel();
        }
 static void Main(string[] args)
 {
     Console.ForegroundColor = ConsoleColor.DarkRed;
     Console.WriteLine("Masir");
     BCCConsole.Write(BCCConsoleColor.DarkBlue, true, "Amir", "Asir");
     "Masir".Yellow();
 }
예제 #3
0
        private static void TrainModelWithUnbalancedData()
        {
            BCCConsole.Write(BCCConsoleColor.Gray, false, "Trainer Base Is Started ...");
            var    mlContext      = new MLContext(0);
            string trainDataPath  = Environment.CurrentDirectory + @"\Data\uci-news-aggregator.csv";
            string trainCachePath = @"Cache\";

            string unbalancedDataFile = "Data\\Unbalanced.csv";

            CreateUnbalanceDataFile(trainDataPath, unbalancedDataFile);

            var trainDataView = mlContext.Data.LoadFromTextFile <ModelInput>(
                unbalancedDataFile,
                hasHeader: true,
                separatorChar: ',',
                allowQuoting: true
                );
            var preProcessingPipeline = mlContext.Transforms
                                        .Conversion.MapValueToKey("Label", "Category")
                                        .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName: "Title", outputColumnName: "Features"))
                                        .Append(mlContext.Transforms.NormalizeMinMax("Features"))
                                        .AppendCacheCheckpoint(mlContext);
            var trainer = mlContext.MulticlassClassification.Trainers
                          .OneVersusAll(mlContext.BinaryClassification.Trainers.AveragedPerceptron());
            var trainingPipeline = preProcessingPipeline
                                   .Append(trainer)
                                   .Append(
                mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")
                );

            BCCConsole.Write(BCCConsoleColor.Cyan, false, "Cross Validation Is Starting . . .");
            var cvResult = mlContext.MulticlassClassification
                           .CrossValidate(trainDataView, trainingPipeline);

            BCCConsole.Write(BCCConsoleColor.DarkGreen, false, "\n",
                             "Cross Validation Result Metrics",
                             "-----------------------------------");
            var micA = cvResult.Average(m => m.Metrics.MicroAccuracy).ToString("0.###").DarkGreen();
            var macA = cvResult.Average(m => m.Metrics.MacroAccuracy).ToString("0.###").DarkGreen();
            var logA = cvResult.Average(m => m.Metrics.LogLossReduction).ToString("0.###").DarkGreen();

            BCCConsole.Write(BCCConsoleColor.DarkGreen, false, "-----------------------------------");
            var finalModel = trainingPipeline.Fit(trainDataView);
            var modelPath  = "Model\\NewsClassificationModel.zip";

            if (!Directory.Exists("Model\\"))
            {
                Directory.CreateDirectory("Model\\");
            }
            BCCConsole.Write(BCCConsoleColor.Yellow, false, "Saving Model . . .");
            mlContext.Model.Save(finalModel, trainDataView.Schema, modelPath);
            BCCConsole.Write(BCCConsoleColor.Green, false, "Saved !");
        }
예제 #4
0
        private static void HyperParameterExploration(MLContext mlContext
                                                      , IEstimator <ITransformer> dataPreProcessingPipeLine
                                                      , IDataView trainDataView)
        {
            var result = new List <(double RMSE
                                    , double RSQ
                                    , int iterations
                                    , int approximationRank)>();

            for (int iterations = 5; iterations < 100; iterations += 5)
            {
                for (int approximationRank = 50; approximationRank < 250; approximationRank += 50)
                {
                    var option = new MatrixFactorizationTrainer.Options
                    {
                        MatrixColumnIndexColumnName = "UserIdEncoded",
                        MatrixRowIndexColumnName    = "RestaurantNameEncoded",
                        LabelColumnName             = "TotalRating",
                        NumberOfIterations          = iterations,
                        ApproximationRank           = approximationRank,
                        Quiet = true
                    };

                    var trainer = mlContext.Recommendation()
                                  .Trainers.MatrixFactorization(option);
                    var completePipeLine = dataPreProcessingPipeLine.Append(trainer);
                    var cvMetrics        = mlContext.Recommendation()
                                           .CrossValidate(trainDataView, completePipeLine, labelColumnName: "TotalRating");
                    result.Add((
                                   cvMetrics.Average(cv => cv.Metrics.RootMeanSquaredError),
                                   cvMetrics.Average(cv => cv.Metrics.RSquared),
                                   iterations,
                                   approximationRank));
                }
            }

            BCCConsole.Write(BCCConsoleColor.DarkGreen, false, "\n", "--- Hyper Parameter Exploration Result Metrics ---");
            foreach (var res in result.OrderByDescending(r => r.RSQ))
            {
                BCCConsole.Write(BCCConsoleColor.DarkGreen, false, "\n",
                                 $"RSQ => RSquared : {res.RSQ:#.000}",
                                 $"RMSE => Root Error : {res.RMSE:#.000}",
                                 $"I => Iterations : {res.iterations}",
                                 $"AR => ApproximationRank : {res.approximationRank}"
                                 );
            }
            BCCConsole.Write(BCCConsoleColor.DarkGreen, false, "\n", "---------------------------------------");
        }
예제 #5
0
        private static void FindTheBestModel()
        {
            BCCConsole.Write(BCCConsoleColor.DarkGreen, false, "\nFinding the Best Model Using AutoML");
            var    mlContext      = new MLContext(0);
            string trainDataPath  = @"Data\uci-news-aggregator.csv";
            string trainCachePath = @"Cache\";
            string bestModelPath  = @"Model\BestModelRun.zip";
            var    trainDataView  = mlContext.Data.LoadFromTextFile <ModelInput>(
                trainDataPath,
                hasHeader: true,
                separatorChar: ',',
                allowQuoting: true
                );
            var preProcessingPipeline = mlContext.Transforms
                                        .Conversion.MapValueToKey("Category", "Category");
            var mappedInputData = preProcessingPipeline
                                  .Fit(trainDataView).Transform(trainDataView);
            var experimentSetting = new MulticlassExperimentSettings()
            {
                MaxExperimentTimeInSeconds = 300,
                CacheBeforeTrainer         = CacheBeforeTrainer.On,
                OptimizingMetric           = MulticlassClassificationMetric.MicroAccuracy,
                CacheDirectory             = new DirectoryInfo(trainCachePath)
            };
            var experiment       = mlContext.Auto().CreateMulticlassClassificationExperiment(experimentSetting);
            var experimentResult = experiment.Execute(
                trainData: mappedInputData,
                labelColumnName: "Category",
                progressHandler: new MulticlassExperimentProgressHandler()
                );

            BCCConsole.Write(BCCConsoleColor.Yellow, false, "Metrics From Best Run ... ");
            var metrics = experimentResult.BestRun.ValidationMetrics;

            BCCConsole.Write(BCCConsoleColor.DarkGreen, false, $"Metric Micro Accuracy : {metrics.MicroAccuracy:0.##}");
            BCCConsole.Write(BCCConsoleColor.Green, false, "Success !");
        }
예제 #6
0
        public static void CreateUnbalanceDataFile(string inputFilePath, string outputFilePath)
        {
            BCCConsole.Write(BCCConsoleColor.Blue, false, "\n", "Unbalance 10% Filter Is Starting . . .");
            var inputFileRow  = File.ReadAllLines(inputFilePath);
            var outputFileRow = new List <string>();

            outputFileRow.Add(inputFileRow.First());

            int eSample = 0;
            int bSample = 0;
            int tSample = 0;
            int mSample = 0;

            var randomGenerator = new Random(0);

            foreach (var row in inputFileRow.Skip(1))
            {
                if (row.Contains(",b,"))
                {
                    if (randomGenerator.NextDouble() <= .1)
                    {
                        outputFileRow.Add(row);
                        bSample++;
                    }
                }
                else if (row.Contains(",e,"))
                {
                    if (randomGenerator.NextDouble() <= .1)
                    {
                        outputFileRow.Add(row);
                        eSample++;
                    }
                }
                else if (row.Contains(",t,"))
                {
                    if (randomGenerator.NextDouble() <= .1)
                    {
                        outputFileRow.Add(row);
                        tSample++;
                    }
                }
                else if (row.Contains(",m,"))
                {
                    if (randomGenerator.NextDouble() <= .1)
                    {
                        outputFileRow.Add(row);
                        mSample++;
                    }
                }
            }

            File.WriteAllLines(outputFilePath, outputFileRow);

            BCCConsole.Write(BCCConsoleColor.DarkGreen, false,
                             "\n",
                             "---------------------------",
                             "Unbalance Training Test Result ",
                             $"--- Business Rank {bSample}",
                             $"--- Entertainment Rank {eSample}",
                             $"--- Technology Rank {tSample}",
                             $"--- Medical Rank {mSample}",
                             "---------------------------"
                             );
        }
예제 #7
0
        static void Main(string[] args)
        {
            BCCConsole.Write(BCCConsoleColor.DarkBlue, false, "Restaurant Recommender Is Started . . .");

            MLContext mlContext        = new MLContext(0);
            var       trainingDataFile = Environment.CurrentDirectory + @"\Data\TrainingFile.tsv";

            DataPreparer.PreprocessData(trainingDataFile);
            IDataView trainingDataView = mlContext.Data
                                         .LoadFromTextFile <ModelInput>(trainingDataFile, hasHeader: true);

            var dataPreProcessingPipeLine = mlContext.Transforms.Conversion
                                            .MapValueToKey("UserIdEncoded", nameof(ModelInput.UserId))
                                            .Append(mlContext.Transforms.Conversion
                                                    .MapValueToKey("RestaurantNameEncoded", nameof(ModelInput.RestaurantName)));

            var options = new MatrixFactorizationTrainer.Options
            {
                MatrixColumnIndexColumnName = "UserIdEncoded",
                MatrixRowIndexColumnName    = "RestaurantNameEncoded",
                LabelColumnName             = "TotalRating",
                NumberOfIterations          = 10,
                ApproximationRank           = 200,
                Quiet = true
            };

            var trainer = mlContext.Recommendation().Trainers.MatrixFactorization(options);

            var trainerPipeLine = dataPreProcessingPipeLine.Append(trainer);

            #region Not Using CV

            BCCConsole.Write(BCCConsoleColor.DarkBlue, false, "\n", "Training Model");
            var model = trainerPipeLine.Fit(trainingDataView);

            ////Test
            //var testUserId = "U1134";
            var predictionEngine = mlContext.Model
                                   .CreatePredictionEngine <ModelInput, ModelOutput>(model);
            //var alreadyRatedRestaurant = mlContext.Data
            //	.CreateEnumerable<ModelInput>(trainingDataView, false)
            //	.Where(r => r.UserId == testUserId)
            //	.Select(r => r.RestaurantName)
            //	.Distinct();
            //var allRestaurantNames = trainingDataView
            //	.GetColumn<string>("RestaurantName")
            //	.Distinct().Where(r => !alreadyRatedRestaurant.Contains(r));
            //var scoredRestaurant = allRestaurantNames
            //	.Select(rn =>
            //	{
            //		var prediction = predictionEngine.Predict(
            //			new ModelInput()
            //			{
            //				UserId = testUserId,
            //				RestaurantName = rn
            //			});
            //		return (RestaurantName: rn, PredictedScore: prediction.Score);
            //	});

            //var top10Restaurant = scoredRestaurant
            //	.OrderByDescending(r => r.PredictedScore)
            //	.Take(10);
            //BCCConsole.Write(BCCConsoleColor.DarkGreen,false,
            //	"\n",
            //	$"Top 10 Restaurant Name & Rate For User {testUserId}",
            //	"----------------------------------------------------");
            //foreach (var top in top10Restaurant)
            //{
            //	BCCConsole.Write(BCCConsoleColor.DarkGreen,false,$"Prediction Score [{top.PredictedScore:#.0}] | Restaurant Name [{top.RestaurantName}] ");
            //}
            //BCCConsole.Write(BCCConsoleColor.DarkGreen,false, "----------------------------------------------------");

            #endregion

            #region Using CV

            //var cvMetrics = mlContext.Recommendation()
            //	.CrossValidate(trainingDataView, trainerPipeLine, labelColumnName: "TotalRating");

            //var averageRMSE = cvMetrics.Average(cv => cv.Metrics.RootMeanSquaredError);
            //var averageRSquared = cvMetrics.Average(cv => cv.Metrics.RSquared);
            //BCCConsole.Write(BCCConsoleColor.DarkGreen, false,
            //	"\n",
            //	"Training Result Before Cross Validation (Metrics) ",
            //	"--------------------------------------------------",
            //	$"RMSE => Root Error : {averageRMSE:#.000}",
            //	$"RSQ => RSquared : {averageRSquared:#.000}",
            //	"--------------------------------------------------");

            #endregion

            Console.WriteLine("Enter Restaurant Name");
            string rn = Console.ReadLine();

            if (string.IsNullOrEmpty(rn))
            {
                "Error".Red();
                Environment.Exit(-1);
            }

            var prediction = predictionEngine.Predict(new ModelInput()
            {
                UserId         = "CLONED",
                RestaurantName = rn
            });

            BCCConsole.Write(BCCConsoleColor.Green, false, "\n", $"Prediction Result Score : {prediction.Score:#.0} For Rincon Huasteco");

            //HyperParameterExploration(mlContext, dataPreProcessingPipeLine, trainingDataView);
        }