Esempio n. 1
0
        static async Task RegressionExample(bool train = true)
        {
            var    bestAlg                   = string.Empty;
            double mse                       = double.MaxValue;
            var    mlContext                 = new MLContext();
            var    sqlConnection             = $@"Server = localhost;database = Local;user = sa;password = sa";
            IEnumerable <TaxiFare> testdata  = null;
            IEnumerable <TaxiFare> traindata = null;

            using (var connection = new SQLServer(sqlConnection))
            {
                testdata = await connection.QueryAsync <TaxiFareTest>(top : 10);

                traindata = await connection.QueryAsync <TaxiFareTrain>();

                var context               = new MLContext(1);
                var dataframe             = context.Data.LoadFromEnumerable(traindata);
                var crossValidatePreparer = context.Transforms.Concatenate("Features", new[] { "rate_code", "passenger_count", "trip_time_in_secs", "trip_distance" }).
                                            Append(context.Transforms.NormalizeMinMax("Features"));
                var cleanedData     = crossValidatePreparer.Fit(dataframe);
                var transformedData = cleanedData.Transform(dataframe);
                var crossValidate   = context.Regression.CrossValidate(transformedData, context.Regression.Trainers.FastTreeTweedie(), numberOfFolds: 5);
                var rsqrs           = crossValidate.Select(x => new
                {
                    model    = x.Model,
                    rsquared = x.Metrics.RSquared
                }).ToList();
            }
            return;

            var algorithms = new Dictionary <string, Func <IEnumerable <TaxiFare>, Action <ITransformer>, PredictionEngine <TaxiFare, TaxiFareRegression> > >()
            {
                { "SDCA", (data, action) => Regression.StochasticDoubleCoordinateAscent <TaxiFare, TaxiFareRegression>(data, additionModelAction: action) },
                { "LBFGS", (data, action) => Regression.LbfgsPoisson <TaxiFare, TaxiFareRegression>(data, additionModelAction: action) },
                { "FastTree", (data, action) => Regression.FastTree <TaxiFare, TaxiFareRegression>(data, additionModelAction: action) },
                { "FastTreeTweedie", (data, action) => Regression.FastTreeTweedie <TaxiFare, TaxiFareRegression>(data, additionModelAction: action) },
                { "FastForest", (data, action) => Regression.FastForest <TaxiFare, TaxiFareRegression>(data, additionModelAction: action) },
                { "GeneralizedAdditiveModel", (data, action) => Regression.GeneralizedAdditiveModel <TaxiFare, TaxiFareRegression>(data, additionModelAction: action) },
            };

            foreach (var algorithm in algorithms)
            {
                PredictionEngine <TaxiFare, TaxiFareRegression> engine = default;
                ITransformer model = default;
                var          path  = $@"Regression_{algorithm.Key}.zip";
                if (File.Exists(path) && !train)
                {
                    model  = Global.LoadModel(path);
                    engine = mlContext.Model.CreatePredictionEngine <TaxiFare, TaxiFareRegression>(model);
                }
                else
                {
                    engine = algorithm.Value(traindata, (mdl) =>
                    {
                        model = mdl;
                    });
                }
                MachineLearning.Global.SaveModel(model, $@"Regression_{algorithm.Key}.zip");
                var metrics = Metrics.EvaluateRegressionModel(model, mlContext.Data.LoadFromEnumerable(testdata));
                MachineLearning.ConsoleHelper.ConsoleWriteHeader($@"Evaluate metrics for {algorithm.Key} algorithm.");
                foreach (var prop in metrics.GetType().GetProperties())
                {
                    Console.WriteLine($@"{prop.Name} : {prop.GetValue(metrics)}");
                }
                if (metrics.MeanSquaredError < mse)
                {
                    mse     = metrics.MeanSquaredError;
                    bestAlg = algorithm.Key;
                }
                //var predictedList = new List<TaxiFareRegression>();
                //foreach (var t in testdata)
                //{
                //    var predict = engine.Predict(t);
                //    predictedList.Add(predict);
                //    Console.WriteLine(string.Format(@"Actual : {0,5} / Predict {1,5} ({2,0}%)", Math.Round(t.fare_amount, 2), Math.Round(predict.Predicted_Score, 2), predict.CalculateVariance(t)));
                //}
                var predictedList = engine.Predict(testdata);
                VisualizeRegression(algorithm.Key, testdata, predictedList, metrics, $"{algorithm.Key}_reg.svg");
            }
            Console.ForegroundColor = ConsoleColor.Green;
            Console.WriteLine($@"Best algorithm based-on Mean Squared Error : {bestAlg}");
            Console.ForegroundColor = ConsoleColor.White;
        }